diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..c317bd563e346bd301f844dc3854a18122f56bec 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +hf_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c5356b25f4729321651d039cb96c8e0d682c4a8 --- /dev/null +++ b/README.md @@ -0,0 +1,148 @@ +--- +license: gemma +library_name: coreml +base_model: google/gemma-4-E4B-it +tags: + - coreml + - apple-silicon + - ane + - on-device + - gemma-4 + - multimodal + - vision + - audio +pipeline_tag: image-text-to-text +--- + +## Use it from Swift + + +### Add the package + +`Package.swift`: + +```swift +.package(url: "https://github.com/john-rocky/CoreML-LLM", branch: "main"), + +// In your target: +.product(name: "CoreMLLLM", package: "CoreML-LLM"), +``` + +Platforms: iOS 18+ / macOS 15+. + +### Download + chat (one call, text + image + audio) + +```swift +import CoreMLLLM + +// First call pulls the bundle from this repo to Documents/Models/. +let llm = try await CoreMLLLM.load(repo: "mlboydaisuke/gemma-4-E4B-multimodal-coreml") + +// Text-only +let stream = try await llm.generate( + [CoreMLLLM.Message(role: .user, content: "Hello!")], + maxTokens: 256 +) +for await chunk in stream { print(chunk, terminator: "") } + +// Image + text +let image: CGImage = // ... your image +let stream2 = try await llm.generate( + [CoreMLLLM.Message(role: .user, content: "Describe this picture.")], + image: image, maxTokens: 256) + +// Audio + text (16 kHz mono PCM Float) +let pcm: [Float] = // ... your audio samples +let stream3 = try await llm.generate( + [CoreMLLLM.Message(role: .user, content: "What language is this?")], + audio: pcm, maxTokens: 256) +``` + +Set the Xcode scheme env var `LLM_VISION_FORCE_ANE=1` to route the vision encoder through the Apple Neural Engine (built ANE-targeted, 256 tokens per image at the LM hidden dim). + + +# Gemma 4 E4B (multimodal) — Core ML (INT4, Apple Neural Engine) + +Core ML port of [`google/gemma-4-E4B-it`](https://huggingface.co/google/gemma-4-E4B-it) with vision (still image), video, and audio (Conformer) encoders. Sliding-window-attention chunks targeting Apple Neural Engine; vision encoder is ANE-targeted; audio runs on GPU + a small Swift/Accelerate projection sidecar. + +**iPhone 17 Pro validated 2026-05-03** — text decode **15.7 tok/s** with correct outputs across all four input modalities (text / image / video / audio). + +Built from [`john-rocky/CoreML-LLM`](https://github.com/john-rocky/CoreML-LLM); see [`docs/E4B_MULTIMODAL_BUILD.md`](https://github.com/john-rocky/CoreML-LLM/blob/main/docs/E4B_MULTIMODAL_BUILD.md) for the full reproduction guide and [`scripts/assemble_gemma4_e4b_multimodal.sh`](https://github.com/john-rocky/CoreML-LLM/blob/main/scripts/assemble_gemma4_e4b_multimodal.sh) for the assembly script. + +## Files + +``` +# Decode chunks (3-chunk Topology II — auto-detected by ChunkedEngine) +chunk1.mlmodelc/ # L0-11 — own KV +chunk2_3way.mlmodelc/ # L12-32 — merged 21 layers (own + KV-shared internal) +chunk3_3way.mlmodelc/ # L33-41 + lm_head + argmax + +# Prefill chunks (legacy 4-chunk with prefill_b8 multifunction inside) +chunk2.mlmodelc/ # L12-22 prefill (own KV writes via recurrent shift) +chunk3.mlmodelc/ # L23-32 prefill (KV-shared) +chunk4.mlmodelc/ # L33-41 prefill + lm_head + +# Vision encoder (ANE-targeted) +vision.ane.mlmodelc/ # SigLIP, output [1, 256, 2560] + +# Audio encoder + Swift projection sidecars +audio.mlmodelc/ # Conformer, output [1, 50, 1024] +audio_config.json +mel_filterbank.bin +output_proj_weight.npy # 1024 -> 1536 (audio_soft_token_size) +output_proj_bias.npy +embed_proj_weight.npy # 1536 -> 2560 (LM hidden) — E4B-specific shape + +# Token / per-layer embeddings (mmap'd, dequantised on demand by Swift) +embed_tokens_q8.bin 640 MB — INT8 token embeddings (262144 x 2560) +embed_tokens_scales.bin 512 KB +embed_tokens_per_layer_q8.bin 2.6 GB — INT8 per-layer embeddings (PLE) +embed_tokens_per_layer_scales.bin 512 KB +per_layer_projection.bin 53 MB +per_layer_norm_weight.bin 512 B + +# RoPE cos/sin tables (pre-baked, mmap'd) +cos_sliding.npy / sin_sliding.npy +cos_full.npy / sin_full.npy + +# Tokenizer + runtime config +hf_model/ + tokenizer.json, tokenizer_config.json, config.json, generation_config.json +model_config.json +``` + +Total bundle size: **~7.6 GB**. + +## Engine path on iPhone (what runs where) + +| Stage | Compute | Files used | +|---|---|---| +| Token / PLE embed lookup | Swift CPU (mmap) | `embed_tokens*.bin`, `per_layer_*.bin` | +| Decode (T=1) | ANE | `chunk1` + `chunk2_3way` + `chunk3_3way` | +| Prefill (batched, T=8) | ANE | `chunk1` + `chunk2` + `chunk3` + `chunk4` (`prefill_b8` multifunction) | +| Vision encoder | ANE | `vision.ane.mlmodelc` (with `LLM_VISION_FORCE_ANE=1`) | +| Audio encoder | GPU | `audio.mlmodelc` | +| Audio projection (1024 → 1536 → 2560) | Swift / Accelerate | `output_proj_*.npy`, `embed_proj_weight.npy` | + +The Swift runtime auto-detects Topology II by the presence of `chunk2_3way` + `chunk3_3way` and routes prefill through the legacy 4-chunk `prefill_b8` multifunction (the engine's `fillBatchMasksVisionAware` keeps bidirectional within-image attention working at `T=8` batches). + +## Why so many sidecars (vs a single `model.mlpackage`)? + +Gemma 4 E-series uses a per-layer embedding (PLE) bank that's much larger than the token embedding (2.6 GB vs 640 MB for E4B). Loading PLE through Core ML would dequantize the entire bank into the CPU heap and blow up `phys_footprint`. We mmap the raw INT8 + scale `.bin` files instead, dequantize the few rows touched per token in pure Swift, and feed the result to the chunks. The chunks themselves are pure transformer bodies and stay ANE-resident. + +The `.npy` RoPE tables are pre-baked at conversion-time so Swift doesn't need to ship a `cos`/`sin` builder. + +The audio Swift projection (`output_proj_*` / `embed_proj_weight`) lives outside the ANE because of a Core ML GPU runtime bug with `RMSNorm(with_scale=False)` that produces all-zero outputs. Sgemm in Accelerate is fast enough on CPU. + +## Tokenizer + +The Gemma 4 SentencePiece tokenizer ships in `hf_model/`. Three multimodal placeholder token IDs: +- `<|image|>` = 258880 — image-pad span (256 per still image) +- `<|audio|>` = 258881 — audio-pad span (~188 per 2 sec) +- `<|video|>` = 258884 — video-pad span (64 per frame) + +Vision encoder output rows replace `<|image|>`/`<|video|>` rows during prefill (and per-token at decode for tail spans). Audio output rows replace `<|audio|>`. `per_layer_raw` is forced to zero at multimodal positions — the chunks compute `per_layer_combined` entirely from the spliced hidden state. + +## License + +This is a derivative work of `google/gemma-4-E4B-it`. Use is governed by the [Gemma Terms of Use](https://ai.google.dev/gemma/terms). Vision / audio extensions inherit the same license. diff --git a/audio.mlmodelc/analytics/coremldata.bin b/audio.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..59e1d0c6e55befa826e7bcd344f394de14edba8c --- /dev/null +++ b/audio.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea515c53f416101ef42bce8f1a9ac1be59d838914747ecba22b70ead41039ee5 +size 243 diff --git a/audio.mlmodelc/coremldata.bin b/audio.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..16fe0ef528b7abe2e3186429da3258897a48e0b9 --- /dev/null +++ b/audio.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda961a2bf20e093c8fe82e55240512265ee10f1cf48078033ebc972298750b9 +size 390 diff --git a/audio.mlmodelc/metadata.json b/audio.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5bcd568fff1880133868c49c51a8ca62ce9d8ab6 --- /dev/null +++ b/audio.mlmodelc/metadata.json @@ -0,0 +1,84 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Int32, Palettized (10 bits), Palettized (11 bits), Palettized (4 bits), Palettized (9 bits), UInt4)", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 50 × 1024)", + "shortDescription" : "", + "shape" : "[1, 50, 1024]", + "name" : "hidden_states", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.expandDims" : 1, + "Ios18.mul" : 312, + "Ios18.softmax" : 12, + "Ios18.matmul" : 36, + "Ios16.reduceMean" : 108, + "Ios18.sigmoid" : 12, + "Split" : 12, + "Select" : 12, + "Ios18.add" : 168, + "Ios18.layerNorm" : 2, + "Ios18.reshape" : 109, + "Pad" : 60, + "Ios18.constexprLutToDense" : 134, + "Ios18.linear" : 121, + "Ios18.conv" : 14, + "Ios18.relu" : 2, + "Ios18.clip" : 312, + "Ios18.silu" : 36, + "Stack" : 24, + "Ios18.pow" : 216, + "Ios18.cast" : 540, + "Ios18.transpose" : 75, + "Ios18.tanh" : 12, + "Ios18.sliceByIndex" : 144 + }, + "computePrecision" : "Mixed (Float16, Float32, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.conversion_date" : "2026-04-30", + "com.github.apple.coremltools.source" : "torch==2.11.0", + "com.github.apple.coremltools.version" : "9.0", + "com.github.apple.coremltools.source_dialect" : "TorchScript" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 200 × 128)", + "shortDescription" : "", + "shape" : "[1, 200, 128]", + "name" : "input_features", + "type" : "MultiArray" + } + ], + "generatedClassName" : "audio", + "method" : "predict" + } +] \ No newline at end of file diff --git a/audio.mlmodelc/model.mil b/audio.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..fe3ef7711704ba8b9d39d75076addabb513e08a6 --- /dev/null +++ b/audio.mlmodelc/model.mil @@ -0,0 +1,5325 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func main(tensor input_features) { + tensor hidden_states_1_axes_0 = const()[name = string("hidden_states_1_axes_0"), val = tensor([1])]; + tensor hidden_states_1_cast_fp16 = expand_dims(axes = hidden_states_1_axes_0, x = input_features)[name = string("hidden_states_1_cast_fp16")]; + string hidden_states_3_pad_type_0 = const()[name = string("hidden_states_3_pad_type_0"), val = string("custom")]; + tensor hidden_states_3_pad_0 = const()[name = string("hidden_states_3_pad_0"), val = tensor([1, 1, 1, 1])]; + tensor hidden_states_3_strides_0 = const()[name = string("hidden_states_3_strides_0"), val = tensor([2, 2])]; + tensor hidden_states_3_dilations_0 = const()[name = string("hidden_states_3_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_3_groups_0 = const()[name = string("hidden_states_3_groups_0"), val = int32(1)]; + tensor subsample_conv_projection_layer0_conv_weight_to_fp16 = const()[name = string("subsample_conv_projection_layer0_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor hidden_states_3_cast_fp16 = conv(dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = subsample_conv_projection_layer0_conv_weight_to_fp16, x = hidden_states_1_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor var_198 = const()[name = string("op_198"), val = tensor([0, 2, 3, 1])]; + tensor var_202_axes_0 = const()[name = string("op_202_axes_0"), val = tensor([-1])]; + tensor subsample_conv_projection_layer0_norm_weight_to_fp16 = const()[name = string("subsample_conv_projection_layer0_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2432)))]; + fp16 var_182_to_fp16 = const()[name = string("op_182_to_fp16"), val = fp16(0x1.1p-20)]; + tensor input_3_cast_fp16 = transpose(perm = var_198, x = hidden_states_3_cast_fp16)[name = string("transpose_74")]; + tensor var_202_cast_fp16 = layer_norm(axes = var_202_axes_0, epsilon = var_182_to_fp16, gamma = subsample_conv_projection_layer0_norm_weight_to_fp16, x = input_3_cast_fp16)[name = string("op_202_cast_fp16")]; + tensor var_203 = const()[name = string("op_203"), val = tensor([0, 3, 1, 2])]; + tensor var_204_cast_fp16 = transpose(perm = var_203, x = var_202_cast_fp16)[name = string("transpose_73")]; + tensor hidden_states_5_cast_fp16 = relu(x = var_204_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("custom")]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([1, 1, 1, 1])]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([2, 2])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor subsample_conv_projection_layer1_conv_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21248))))[name = string("subsample_conv_projection_layer1_conv_weight_to_fp16_palettized")]; + tensor hidden_states_7_cast_fp16 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = subsample_conv_projection_layer1_conv_weight_to_fp16_palettized, x = hidden_states_5_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_216 = const()[name = string("op_216"), val = tensor([0, 2, 3, 1])]; + tensor var_220_axes_0 = const()[name = string("op_220_axes_0"), val = tensor([-1])]; + tensor subsample_conv_projection_layer1_norm_weight_to_fp16 = const()[name = string("subsample_conv_projection_layer1_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21376)))]; + tensor input_9_cast_fp16 = transpose(perm = var_216, x = hidden_states_7_cast_fp16)[name = string("transpose_72")]; + tensor var_220_cast_fp16 = layer_norm(axes = var_220_axes_0, epsilon = var_182_to_fp16, gamma = subsample_conv_projection_layer1_norm_weight_to_fp16, x = input_9_cast_fp16)[name = string("op_220_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = relu(x = var_220_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor var_230 = const()[name = string("op_230"), val = tensor([1, 50, -1])]; + tensor input_13_cast_fp16 = reshape(shape = var_230, x = hidden_states_9_cast_fp16)[name = string("input_13_cast_fp16")]; + tensor subsample_conv_projection_input_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545856))))[name = string("subsample_conv_projection_input_proj_linear_weight_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546944)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = subsample_conv_projection_input_proj_linear_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("linear_0_cast_fp16")]; + string linear_0_cast_fp16_to_fp32_dtype_0 = const()[name = string("linear_0_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_242 = const()[name = string("op_242"), val = fp32(-0x1p-1)]; + fp32 var_243 = const()[name = string("op_243"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_244 = const()[name = string("op_244"), val = fp32(-0x1.2a05f2p+33)]; + tensor linear_0_cast_fp16_to_fp32 = cast(dtype = linear_0_cast_fp16_to_fp32_dtype_0, x = linear_0_cast_fp16)[name = string("cast_539")]; + tensor clip_0 = clip(alpha = var_244, beta = var_243, x = linear_0_cast_fp16_to_fp32)[name = string("clip_0")]; + fp32 var_238_promoted = const()[name = string("op_238_promoted"), val = fp32(0x1p+1)]; + tensor var_252 = pow(x = clip_0, y = var_238_promoted)[name = string("op_252")]; + tensor var_254_axes_0 = const()[name = string("op_254_axes_0"), val = tensor([-1])]; + bool var_254_keep_dims_0 = const()[name = string("op_254_keep_dims_0"), val = bool(true)]; + tensor var_254 = reduce_mean(axes = var_254_axes_0, keep_dims = var_254_keep_dims_0, x = var_252)[name = string("op_254")]; + string var_254_to_fp16_dtype_0 = const()[name = string("op_254_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_255_to_fp16 = const()[name = string("op_255_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_254_to_fp16 = cast(dtype = var_254_to_fp16_dtype_0, x = var_254)[name = string("cast_538")]; + tensor mean_squared_1_cast_fp16 = add(x = var_254_to_fp16, y = var_255_to_fp16)[name = string("mean_squared_1_cast_fp16")]; + string mean_squared_1_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_1_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_1_cast_fp16_to_fp32 = cast(dtype = mean_squared_1_cast_fp16_to_fp32_dtype_0, x = mean_squared_1_cast_fp16)[name = string("cast_537")]; + tensor var_257 = pow(x = mean_squared_1_cast_fp16_to_fp32, y = var_242)[name = string("op_257")]; + string clip_0_to_fp16_dtype_0 = const()[name = string("clip_0_to_fp16_dtype_0"), val = string("fp16")]; + string var_257_to_fp16_dtype_0 = const()[name = string("op_257_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_0_to_fp16 = cast(dtype = clip_0_to_fp16_dtype_0, x = clip_0)[name = string("cast_535")]; + tensor var_257_to_fp16 = cast(dtype = var_257_to_fp16_dtype_0, x = var_257)[name = string("cast_536")]; + tensor normed_output_1_cast_fp16 = mul(x = clip_0_to_fp16, y = var_257_to_fp16)[name = string("normed_output_1_cast_fp16")]; + tensor const_2_to_fp16 = const()[name = string("const_2_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549056)))]; + tensor normed_output_3_cast_fp16 = mul(x = normed_output_1_cast_fp16, y = const_2_to_fp16)[name = string("normed_output_3_cast_fp16")]; + fp16 feed_forward1s_0_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_0_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.9cp+3)]; + fp16 feed_forward1s_0_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_0_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.9ap+3)]; + tensor clip_1_cast_fp16 = clip(alpha = feed_forward1s_0_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_0_ffw_layer_1_input_max_to_fp16, x = normed_output_3_cast_fp16)[name = string("clip_1_cast_fp16")]; + tensor feed_forward1s_0_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2648384))))[name = string("feed_forward1s_0_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2652544)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_0_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_1_cast_fp16)[name = string("linear_1_cast_fp16")]; + fp16 feed_forward1s_0_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_0_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.4p+5)]; + fp16 feed_forward1s_0_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_0_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.3ep+5)]; + tensor clip_2_cast_fp16 = clip(alpha = feed_forward1s_0_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_0_ffw_layer_1_output_max_to_fp16, x = linear_1_cast_fp16)[name = string("clip_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = silu(x = clip_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + fp16 feed_forward1s_0_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_0_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.66p+3)]; + fp16 feed_forward1s_0_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_0_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.62p+3)]; + tensor clip_3_cast_fp16 = clip(alpha = feed_forward1s_0_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_0_ffw_layer_2_input_max_to_fp16, x = hidden_states_21_cast_fp16)[name = string("clip_3_cast_fp16")]; + tensor feed_forward1s_0_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2660800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4758016))))[name = string("feed_forward1s_0_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_0_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_3_cast_fp16)[name = string("linear_2_cast_fp16")]; + fp16 feed_forward1s_0_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_0_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.12p+5)]; + fp16 feed_forward1s_0_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_0_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.1p+5)]; + tensor clip_4_cast_fp16 = clip(alpha = feed_forward1s_0_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_0_ffw_layer_2_output_max_to_fp16, x = linear_2_cast_fp16)[name = string("clip_4_cast_fp16")]; + string clip_4_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_4_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_4_cast_fp16_to_fp32 = cast(dtype = clip_4_cast_fp16_to_fp32_dtype_0, x = clip_4_cast_fp16)[name = string("cast_534")]; + tensor clip_5 = clip(alpha = var_244, beta = var_243, x = clip_4_cast_fp16_to_fp32)[name = string("clip_5")]; + fp32 var_238_promoted_1 = const()[name = string("op_238_promoted_1"), val = fp32(0x1p+1)]; + tensor var_284 = pow(x = clip_5, y = var_238_promoted_1)[name = string("op_284")]; + tensor var_286_axes_0 = const()[name = string("op_286_axes_0"), val = tensor([-1])]; + bool var_286_keep_dims_0 = const()[name = string("op_286_keep_dims_0"), val = bool(true)]; + tensor var_286 = reduce_mean(axes = var_286_axes_0, keep_dims = var_286_keep_dims_0, x = var_284)[name = string("op_286")]; + string var_286_to_fp16_dtype_0 = const()[name = string("op_286_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_287_to_fp16 = const()[name = string("op_287_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_286_to_fp16 = cast(dtype = var_286_to_fp16_dtype_0, x = var_286)[name = string("cast_533")]; + tensor mean_squared_3_cast_fp16 = add(x = var_286_to_fp16, y = var_287_to_fp16)[name = string("mean_squared_3_cast_fp16")]; + string mean_squared_3_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_3_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_3_cast_fp16_to_fp32 = cast(dtype = mean_squared_3_cast_fp16_to_fp32_dtype_0, x = mean_squared_3_cast_fp16)[name = string("cast_532")]; + tensor var_289 = pow(x = mean_squared_3_cast_fp16_to_fp32, y = var_242)[name = string("op_289")]; + string clip_5_to_fp16_dtype_0 = const()[name = string("clip_5_to_fp16_dtype_0"), val = string("fp16")]; + string var_289_to_fp16_dtype_0 = const()[name = string("op_289_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_5_to_fp16 = cast(dtype = clip_5_to_fp16_dtype_0, x = clip_5)[name = string("cast_530")]; + tensor var_289_to_fp16 = cast(dtype = var_289_to_fp16_dtype_0, x = var_289)[name = string("cast_531")]; + tensor normed_output_5_cast_fp16 = mul(x = clip_5_to_fp16, y = var_289_to_fp16)[name = string("normed_output_5_cast_fp16")]; + tensor const_3_to_fp16 = const()[name = string("const_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4759104)))]; + tensor normed_output_7_cast_fp16 = mul(x = normed_output_5_cast_fp16, y = const_3_to_fp16)[name = string("normed_output_7_cast_fp16")]; + fp16 var_234_to_fp16 = const()[name = string("op_234_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_33_cast_fp16 = mul(x = normed_output_7_cast_fp16, y = var_234_to_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor hidden_states_35_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_0_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; + fp16 var_296_to_fp16 = const()[name = string("op_296_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_297_to_fp16 = const()[name = string("op_297_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_6_cast_fp16 = clip(alpha = var_296_to_fp16, beta = var_297_to_fp16, x = hidden_states_35_cast_fp16)[name = string("clip_6_cast_fp16")]; + string clip_6_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_6_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_299 = const()[name = string("op_299"), val = fp32(-0x1p-1)]; + fp32 var_303_promoted = const()[name = string("op_303_promoted"), val = fp32(0x1p+1)]; + tensor clip_6_cast_fp16_to_fp32 = cast(dtype = clip_6_cast_fp16_to_fp32_dtype_0, x = clip_6_cast_fp16)[name = string("cast_529")]; + tensor var_309 = pow(x = clip_6_cast_fp16_to_fp32, y = var_303_promoted)[name = string("op_309")]; + tensor var_311_axes_0 = const()[name = string("op_311_axes_0"), val = tensor([-1])]; + bool var_311_keep_dims_0 = const()[name = string("op_311_keep_dims_0"), val = bool(true)]; + tensor var_311 = reduce_mean(axes = var_311_axes_0, keep_dims = var_311_keep_dims_0, x = var_309)[name = string("op_311")]; + string var_311_to_fp16_dtype_0 = const()[name = string("op_311_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_312_to_fp16 = const()[name = string("op_312_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_311_to_fp16 = cast(dtype = var_311_to_fp16_dtype_0, x = var_311)[name = string("cast_528")]; + tensor mean_squared_5_cast_fp16 = add(x = var_311_to_fp16, y = var_312_to_fp16)[name = string("mean_squared_5_cast_fp16")]; + string mean_squared_5_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_5_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_5_cast_fp16_to_fp32 = cast(dtype = mean_squared_5_cast_fp16_to_fp32_dtype_0, x = mean_squared_5_cast_fp16)[name = string("cast_527")]; + tensor var_314 = pow(x = mean_squared_5_cast_fp16_to_fp32, y = var_299)[name = string("op_314")]; + string var_314_to_fp16_dtype_0 = const()[name = string("op_314_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_314_to_fp16 = cast(dtype = var_314_to_fp16_dtype_0, x = var_314)[name = string("cast_526")]; + tensor normed_output_9_cast_fp16 = mul(x = clip_6_cast_fp16, y = var_314_to_fp16)[name = string("normed_output_9_cast_fp16")]; + tensor const_4_to_fp16 = const()[name = string("const_4_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4761216)))]; + tensor normed_output_11_cast_fp16 = mul(x = normed_output_9_cast_fp16, y = const_4_to_fp16)[name = string("normed_output_11_cast_fp16")]; + int32 var_320 = const()[name = string("op_320"), val = int32(-1)]; + fp32 var_321 = const()[name = string("op_321"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_0_q_proj_input_min_to_fp16 = const()[name = string("self_attns_0_q_proj_input_min_to_fp16"), val = fp16(-0x1.46p+4)]; + fp16 self_attns_0_q_proj_input_max_to_fp16 = const()[name = string("self_attns_0_q_proj_input_max_to_fp16"), val = fp16(0x1.44p+4)]; + tensor clip_7_cast_fp16 = clip(alpha = self_attns_0_q_proj_input_min_to_fp16, beta = self_attns_0_q_proj_input_max_to_fp16, x = normed_output_11_cast_fp16)[name = string("clip_7_cast_fp16")]; + tensor self_attns_0_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4763328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5287680))))[name = string("self_attns_0_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_0_q_proj_linear_weight_to_fp16_palettized, x = clip_7_cast_fp16)[name = string("linear_3_cast_fp16")]; + fp16 self_attns_0_q_proj_output_min_to_fp16 = const()[name = string("self_attns_0_q_proj_output_min_to_fp16"), val = fp16(-0x1.14p+5)]; + fp16 self_attns_0_q_proj_output_max_to_fp16 = const()[name = string("self_attns_0_q_proj_output_max_to_fp16"), val = fp16(0x1.12p+5)]; + tensor clip_8_cast_fp16 = clip(alpha = self_attns_0_q_proj_output_min_to_fp16, beta = self_attns_0_q_proj_output_max_to_fp16, x = linear_3_cast_fp16)[name = string("clip_8_cast_fp16")]; + tensor var_365 = const()[name = string("op_365"), val = tensor([1, 50, 8, 128])]; + tensor q_1_cast_fp16 = reshape(shape = var_365, x = clip_8_cast_fp16)[name = string("q_1_cast_fp16")]; + tensor self_attns_0_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5288768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5813120))))[name = string("self_attns_0_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_0_k_proj_linear_weight_to_fp16_palettized, x = clip_7_cast_fp16)[name = string("linear_4_cast_fp16")]; + fp16 self_attns_0_k_proj_output_min_to_fp16 = const()[name = string("self_attns_0_k_proj_output_min_to_fp16"), val = fp16(-0x1.14p+5)]; + fp16 self_attns_0_k_proj_output_max_to_fp16 = const()[name = string("self_attns_0_k_proj_output_max_to_fp16"), val = fp16(0x1.12p+5)]; + tensor clip_10_cast_fp16 = clip(alpha = self_attns_0_k_proj_output_min_to_fp16, beta = self_attns_0_k_proj_output_max_to_fp16, x = linear_4_cast_fp16)[name = string("clip_10_cast_fp16")]; + tensor var_377 = const()[name = string("op_377"), val = tensor([1, 50, 8, 128])]; + tensor k_1_cast_fp16 = reshape(shape = var_377, x = clip_10_cast_fp16)[name = string("k_1_cast_fp16")]; + tensor self_attns_0_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5814208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6338560))))[name = string("self_attns_0_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_0_v_proj_linear_weight_to_fp16_palettized, x = clip_7_cast_fp16)[name = string("linear_5_cast_fp16")]; + fp16 self_attns_0_v_proj_output_min_to_fp16 = const()[name = string("self_attns_0_v_proj_output_min_to_fp16"), val = fp16(-0x1.14p+5)]; + fp16 self_attns_0_v_proj_output_max_to_fp16 = const()[name = string("self_attns_0_v_proj_output_max_to_fp16"), val = fp16(0x1.12p+5)]; + tensor clip_12_cast_fp16 = clip(alpha = self_attns_0_v_proj_output_min_to_fp16, beta = self_attns_0_v_proj_output_max_to_fp16, x = linear_5_cast_fp16)[name = string("clip_12_cast_fp16")]; + tensor var_389 = const()[name = string("op_389"), val = tensor([1, 50, 8, 128])]; + tensor input_31_cast_fp16 = reshape(shape = var_389, x = clip_12_cast_fp16)[name = string("input_31_cast_fp16")]; + fp16 var_391_to_fp16 = const()[name = string("op_391_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_392_cast_fp16 = mul(x = q_1_cast_fp16, y = var_391_to_fp16)[name = string("op_392_cast_fp16")]; + tensor var_393_to_fp16 = const()[name = string("op_393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6339648)))]; + tensor input_27_cast_fp16 = mul(x = var_392_cast_fp16, y = var_393_to_fp16)[name = string("input_27_cast_fp16")]; + fp16 var_395_to_fp16 = const()[name = string("op_395_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_29_cast_fp16 = mul(x = k_1_cast_fp16, y = var_395_to_fp16)[name = string("input_29_cast_fp16")]; + tensor q_padded_1_pad_0 = const()[name = string("q_padded_1_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_1_mode_0 = const()[name = string("q_padded_1_mode_0"), val = string("constant")]; + fp16 const_5_to_fp16 = const()[name = string("const_5_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_1_cast_fp16 = pad(constant_val = const_5_to_fp16, mode = q_padded_1_mode_0, pad = q_padded_1_pad_0, x = input_27_cast_fp16)[name = string("q_padded_1_cast_fp16")]; + tensor var_399 = const()[name = string("op_399"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_1_cast_fp16 = reshape(shape = var_399, x = q_padded_1_cast_fp16)[name = string("q_blocks_1_cast_fp16")]; + tensor k_padded_1_pad_0 = const()[name = string("k_padded_1_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_1_mode_0 = const()[name = string("k_padded_1_mode_0"), val = string("constant")]; + fp16 const_6_to_fp16 = const()[name = string("const_6_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_1_cast_fp16 = pad(constant_val = const_6_to_fp16, mode = k_padded_1_mode_0, pad = k_padded_1_pad_0, x = input_29_cast_fp16)[name = string("k_padded_1_cast_fp16")]; + tensor v_padded_1_pad_0 = const()[name = string("v_padded_1_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_1_mode_0 = const()[name = string("v_padded_1_mode_0"), val = string("constant")]; + fp16 const_7_to_fp16 = const()[name = string("const_7_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_1_cast_fp16 = pad(constant_val = const_7_to_fp16, mode = v_padded_1_mode_0, pad = v_padded_1_pad_0, x = input_31_cast_fp16)[name = string("v_padded_1_cast_fp16")]; + tensor var_406_begin_0 = const()[name = string("op_406_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_406_end_0 = const()[name = string("op_406_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_406_end_mask_0 = const()[name = string("op_406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_406_cast_fp16 = slice_by_index(begin = var_406_begin_0, end = var_406_end_0, end_mask = var_406_end_mask_0, x = k_padded_1_cast_fp16)[name = string("op_406_cast_fp16")]; + tensor var_408_begin_0 = const()[name = string("op_408_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_408_end_0 = const()[name = string("op_408_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_408_end_mask_0 = const()[name = string("op_408_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_padded_1_cast_fp16)[name = string("op_408_cast_fp16")]; + tensor var_410_begin_0 = const()[name = string("op_410_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_410_end_0 = const()[name = string("op_410_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_410_end_mask_0 = const()[name = string("op_410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_410_cast_fp16 = slice_by_index(begin = var_410_begin_0, end = var_410_end_0, end_mask = var_410_end_mask_0, x = k_padded_1_cast_fp16)[name = string("op_410_cast_fp16")]; + tensor var_412_begin_0 = const()[name = string("op_412_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_412_end_0 = const()[name = string("op_412_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_412_end_mask_0 = const()[name = string("op_412_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_padded_1_cast_fp16)[name = string("op_412_cast_fp16")]; + tensor var_414_begin_0 = const()[name = string("op_414_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_414_end_0 = const()[name = string("op_414_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_414_end_mask_0 = const()[name = string("op_414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_414_cast_fp16 = slice_by_index(begin = var_414_begin_0, end = var_414_end_0, end_mask = var_414_end_mask_0, x = k_padded_1_cast_fp16)[name = string("op_414_cast_fp16")]; + int32 k_blocks_1_axis_0 = const()[name = string("k_blocks_1_axis_0"), val = int32(1)]; + tensor k_blocks_1_cast_fp16 = stack(axis = k_blocks_1_axis_0, values = (var_406_cast_fp16, var_408_cast_fp16, var_410_cast_fp16, var_412_cast_fp16, var_414_cast_fp16))[name = string("k_blocks_1_cast_fp16")]; + tensor var_418_begin_0 = const()[name = string("op_418_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_418_end_0 = const()[name = string("op_418_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_418_end_mask_0 = const()[name = string("op_418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_418_cast_fp16 = slice_by_index(begin = var_418_begin_0, end = var_418_end_0, end_mask = var_418_end_mask_0, x = v_padded_1_cast_fp16)[name = string("op_418_cast_fp16")]; + tensor var_420_begin_0 = const()[name = string("op_420_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_420_end_0 = const()[name = string("op_420_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_420_end_mask_0 = const()[name = string("op_420_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = v_padded_1_cast_fp16)[name = string("op_420_cast_fp16")]; + tensor var_422_begin_0 = const()[name = string("op_422_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_422_end_0 = const()[name = string("op_422_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_422_end_mask_0 = const()[name = string("op_422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_422_cast_fp16 = slice_by_index(begin = var_422_begin_0, end = var_422_end_0, end_mask = var_422_end_mask_0, x = v_padded_1_cast_fp16)[name = string("op_422_cast_fp16")]; + tensor var_424_begin_0 = const()[name = string("op_424_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_424_end_0 = const()[name = string("op_424_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_424_end_mask_0 = const()[name = string("op_424_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = v_padded_1_cast_fp16)[name = string("op_424_cast_fp16")]; + tensor var_426_begin_0 = const()[name = string("op_426_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_426_end_0 = const()[name = string("op_426_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_426_end_mask_0 = const()[name = string("op_426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_426_cast_fp16 = slice_by_index(begin = var_426_begin_0, end = var_426_end_0, end_mask = var_426_end_mask_0, x = v_padded_1_cast_fp16)[name = string("op_426_cast_fp16")]; + int32 v_blocks_1_axis_0 = const()[name = string("v_blocks_1_axis_0"), val = int32(1)]; + tensor v_blocks_1_cast_fp16 = stack(axis = v_blocks_1_axis_0, values = (var_418_cast_fp16, var_420_cast_fp16, var_422_cast_fp16, var_424_cast_fp16, var_426_cast_fp16))[name = string("v_blocks_1_cast_fp16")]; + tensor var_434 = const()[name = string("op_434"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_436 = const()[name = string("op_436"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_1_transpose_x_0 = const()[name = string("matrix_ac_1_transpose_x_0"), val = bool(false)]; + bool matrix_ac_1_transpose_y_0 = const()[name = string("matrix_ac_1_transpose_y_0"), val = bool(false)]; + tensor queries_1_cast_fp16 = transpose(perm = var_434, x = q_blocks_1_cast_fp16)[name = string("transpose_70")]; + tensor keys_t_1_cast_fp16 = transpose(perm = var_436, x = k_blocks_1_cast_fp16)[name = string("transpose_71")]; + tensor matrix_ac_1_cast_fp16 = matmul(transpose_x = matrix_ac_1_transpose_x_0, transpose_y = matrix_ac_1_transpose_y_0, x = queries_1_cast_fp16, y = keys_t_1_cast_fp16)[name = string("matrix_ac_1_cast_fp16")]; + tensor var_439 = const()[name = string("op_439"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_1_cast_fp16 = reshape(shape = var_439, x = queries_1_cast_fp16)[name = string("q_flat_1_cast_fp16")]; + bool matrix_bd_1_transpose_x_0 = const()[name = string("matrix_bd_1_transpose_x_0"), val = bool(false)]; + bool matrix_bd_1_transpose_y_0 = const()[name = string("matrix_bd_1_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_1_to_fp16 = const()[name = string("rel_k_t_1_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6339968)))]; + tensor matrix_bd_1_cast_fp16 = matmul(transpose_x = matrix_bd_1_transpose_x_0, transpose_y = matrix_bd_1_transpose_y_0, x = q_flat_1_cast_fp16, y = rel_k_t_1_to_fp16)[name = string("matrix_bd_1_cast_fp16")]; + tensor var_444 = const()[name = string("op_444"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_35_cast_fp16 = reshape(shape = var_444, x = matrix_bd_1_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor matrix_bd_3_pad_0 = const()[name = string("matrix_bd_3_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6366656)))]; + string matrix_bd_3_mode_0 = const()[name = string("matrix_bd_3_mode_0"), val = string("constant")]; + fp16 const_9_to_fp16 = const()[name = string("const_9_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_3_cast_fp16 = pad(constant_val = const_9_to_fp16, mode = matrix_bd_3_mode_0, pad = matrix_bd_3_pad_0, x = input_35_cast_fp16)[name = string("matrix_bd_3_cast_fp16")]; + tensor var_448 = const()[name = string("op_448"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_5_cast_fp16 = reshape(shape = var_448, x = matrix_bd_3_cast_fp16)[name = string("matrix_bd_5_cast_fp16")]; + tensor matrix_bd_7_begin_0 = const()[name = string("matrix_bd_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_7_end_0 = const()[name = string("matrix_bd_7_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_7_end_mask_0 = const()[name = string("matrix_bd_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_7_cast_fp16 = slice_by_index(begin = matrix_bd_7_begin_0, end = matrix_bd_7_end_0, end_mask = matrix_bd_7_end_mask_0, x = matrix_bd_5_cast_fp16)[name = string("matrix_bd_7_cast_fp16")]; + tensor var_454 = const()[name = string("op_454"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_9_cast_fp16 = reshape(shape = var_454, x = matrix_bd_7_cast_fp16)[name = string("matrix_bd_9_cast_fp16")]; + tensor attn_1_cast_fp16 = add(x = matrix_ac_1_cast_fp16, y = matrix_bd_9_cast_fp16)[name = string("attn_1_cast_fp16")]; + fp16 _inversed_457_y_0_to_fp16 = const()[name = string("_inversed_457_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_457_cast_fp16 = mul(x = attn_1_cast_fp16, y = _inversed_457_y_0_to_fp16)[name = string("_inversed_457_cast_fp16")]; + string _inversed_457_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_457_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_457_cast_fp16_to_fp32 = cast(dtype = _inversed_457_cast_fp16_to_fp32_dtype_0, x = _inversed_457_cast_fp16)[name = string("cast_525")]; + tensor var_458 = tanh(x = _inversed_457_cast_fp16_to_fp32)[name = string("op_458")]; + string var_458_to_fp16_dtype_0 = const()[name = string("op_458_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_0_softcap_to_fp16 = const()[name = string("self_attns_0_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_458_to_fp16 = cast(dtype = var_458_to_fp16_dtype_0, x = var_458)[name = string("cast_524")]; + tensor attn_3_cast_fp16 = mul(x = var_458_to_fp16, y = self_attns_0_softcap_to_fp16)[name = string("attn_3_cast_fp16")]; + string attn_3_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_3_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor var_460 = const()[name = string("op_460"), val = tensor([[[[[true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false]], [[true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true, true], [true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true], [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true], [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true], [true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true], [true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true], [true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true], [true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true], [true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true], [true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true], [true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false]], [[true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true, true], [true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true], [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true], [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true], [true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true], [true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true], [true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true], [true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true], [true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true], [true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true], [true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false]], [[true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true, true], [true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true], [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true], [true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true], [true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true], [true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true], [true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true], [true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true], [true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true], [true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true], [true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true], [true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false]], [[true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true, true], [true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true]]]]])]; + tensor attn_3_cast_fp16_to_fp32 = cast(dtype = attn_3_cast_fp16_to_fp32_dtype_0, x = attn_3_cast_fp16)[name = string("cast_523")]; + tensor input_37 = select(a = var_321, b = attn_3_cast_fp16_to_fp32, cond = var_460)[name = string("input_37")]; + tensor var_462 = softmax(axis = var_320, x = input_37)[name = string("op_462")]; + tensor var_464 = const()[name = string("op_464"), val = tensor([0, 3, 1, -3, -1])]; + bool out_1_transpose_x_0 = const()[name = string("out_1_transpose_x_0"), val = bool(false)]; + bool out_1_transpose_y_0 = const()[name = string("out_1_transpose_y_0"), val = bool(false)]; + string var_462_to_fp16_dtype_0 = const()[name = string("op_462_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_1_cast_fp16 = transpose(perm = var_464, x = v_blocks_1_cast_fp16)[name = string("transpose_69")]; + tensor var_462_to_fp16 = cast(dtype = var_462_to_fp16_dtype_0, x = var_462)[name = string("cast_522")]; + tensor out_1_cast_fp16 = matmul(transpose_x = out_1_transpose_x_0, transpose_y = out_1_transpose_y_0, x = var_462_to_fp16, y = values_t_1_cast_fp16)[name = string("out_1_cast_fp16")]; + tensor var_467 = const()[name = string("op_467"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_469 = const()[name = string("op_469"), val = tensor([1, 60, 1024])]; + tensor var_468_cast_fp16 = transpose(perm = var_467, x = out_1_cast_fp16)[name = string("transpose_68")]; + tensor out_3_cast_fp16 = reshape(shape = var_469, x = var_468_cast_fp16)[name = string("out_3_cast_fp16")]; + tensor var_472_begin_0 = const()[name = string("op_472_begin_0"), val = tensor([0, 0, 0])]; + tensor var_472_end_0 = const()[name = string("op_472_end_0"), val = tensor([1, 50, 1024])]; + tensor var_472_end_mask_0 = const()[name = string("op_472_end_mask_0"), val = tensor([true, false, true])]; + tensor var_472_cast_fp16 = slice_by_index(begin = var_472_begin_0, end = var_472_end_0, end_mask = var_472_end_mask_0, x = out_3_cast_fp16)[name = string("op_472_cast_fp16")]; + fp16 self_attns_0_post_input_min_to_fp16 = const()[name = string("self_attns_0_post_input_min_to_fp16"), val = fp16(-0x1.aap+4)]; + fp16 self_attns_0_post_input_max_to_fp16 = const()[name = string("self_attns_0_post_input_max_to_fp16"), val = fp16(0x1.a8p+4)]; + tensor clip_13_cast_fp16 = clip(alpha = self_attns_0_post_input_min_to_fp16, beta = self_attns_0_post_input_max_to_fp16, x = var_472_cast_fp16)[name = string("clip_13_cast_fp16")]; + tensor self_attns_0_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6366784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6891136))))[name = string("self_attns_0_post_linear_weight_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_0_post_linear_weight_to_fp16_palettized, x = clip_13_cast_fp16)[name = string("linear_7_cast_fp16")]; + fp16 self_attns_0_post_output_min_to_fp16 = const()[name = string("self_attns_0_post_output_min_to_fp16"), val = fp16(-0x1.96p+6)]; + fp16 self_attns_0_post_output_max_to_fp16 = const()[name = string("self_attns_0_post_output_max_to_fp16"), val = fp16(0x1.92p+6)]; + tensor clip_14_cast_fp16 = clip(alpha = self_attns_0_post_output_min_to_fp16, beta = self_attns_0_post_output_max_to_fp16, x = linear_7_cast_fp16)[name = string("clip_14_cast_fp16")]; + fp16 var_484_to_fp16 = const()[name = string("op_484_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_485_to_fp16 = const()[name = string("op_485_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_15_cast_fp16 = clip(alpha = var_484_to_fp16, beta = var_485_to_fp16, x = clip_14_cast_fp16)[name = string("clip_15_cast_fp16")]; + string clip_15_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_15_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_487 = const()[name = string("op_487"), val = fp32(-0x1p-1)]; + fp32 var_491_promoted = const()[name = string("op_491_promoted"), val = fp32(0x1p+1)]; + tensor clip_15_cast_fp16_to_fp32 = cast(dtype = clip_15_cast_fp16_to_fp32_dtype_0, x = clip_15_cast_fp16)[name = string("cast_521")]; + tensor var_497 = pow(x = clip_15_cast_fp16_to_fp32, y = var_491_promoted)[name = string("op_497")]; + tensor var_499_axes_0 = const()[name = string("op_499_axes_0"), val = tensor([-1])]; + bool var_499_keep_dims_0 = const()[name = string("op_499_keep_dims_0"), val = bool(true)]; + tensor var_499 = reduce_mean(axes = var_499_axes_0, keep_dims = var_499_keep_dims_0, x = var_497)[name = string("op_499")]; + string var_499_to_fp16_dtype_0 = const()[name = string("op_499_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_500_to_fp16 = const()[name = string("op_500_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_499_to_fp16 = cast(dtype = var_499_to_fp16_dtype_0, x = var_499)[name = string("cast_520")]; + tensor mean_squared_7_cast_fp16 = add(x = var_499_to_fp16, y = var_500_to_fp16)[name = string("mean_squared_7_cast_fp16")]; + string mean_squared_7_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_7_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_7_cast_fp16_to_fp32 = cast(dtype = mean_squared_7_cast_fp16_to_fp32_dtype_0, x = mean_squared_7_cast_fp16)[name = string("cast_519")]; + tensor var_502 = pow(x = mean_squared_7_cast_fp16_to_fp32, y = var_487)[name = string("op_502")]; + string var_502_to_fp16_dtype_0 = const()[name = string("op_502_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_502_to_fp16 = cast(dtype = var_502_to_fp16_dtype_0, x = var_502)[name = string("cast_518")]; + tensor normed_output_13_cast_fp16 = mul(x = clip_15_cast_fp16, y = var_502_to_fp16)[name = string("normed_output_13_cast_fp16")]; + tensor const_10_to_fp16 = const()[name = string("const_10_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6892224)))]; + tensor normed_output_15_cast_fp16 = mul(x = normed_output_13_cast_fp16, y = const_10_to_fp16)[name = string("normed_output_15_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = normed_output_15_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + string hidden_states_61_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_61_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_509 = const()[name = string("op_509"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_510 = const()[name = string("op_510"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_522 = const()[name = string("op_522"), val = fp32(-0x1p-1)]; + fp32 var_518_promoted = const()[name = string("op_518_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_61_cast_fp16_to_fp32 = cast(dtype = hidden_states_61_cast_fp16_to_fp32_dtype_0, x = hidden_states_61_cast_fp16)[name = string("cast_517")]; + tensor var_530 = pow(x = hidden_states_61_cast_fp16_to_fp32, y = var_518_promoted)[name = string("op_530")]; + tensor var_532_axes_0 = const()[name = string("op_532_axes_0"), val = tensor([-1])]; + bool var_532_keep_dims_0 = const()[name = string("op_532_keep_dims_0"), val = bool(true)]; + tensor var_532 = reduce_mean(axes = var_532_axes_0, keep_dims = var_532_keep_dims_0, x = var_530)[name = string("op_532")]; + string var_532_to_fp16_dtype_0 = const()[name = string("op_532_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_533_to_fp16 = const()[name = string("op_533_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_532_to_fp16 = cast(dtype = var_532_to_fp16_dtype_0, x = var_532)[name = string("cast_516")]; + tensor mean_squared_9_cast_fp16 = add(x = var_532_to_fp16, y = var_533_to_fp16)[name = string("mean_squared_9_cast_fp16")]; + string mean_squared_9_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_9_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_9_cast_fp16_to_fp32 = cast(dtype = mean_squared_9_cast_fp16_to_fp32_dtype_0, x = mean_squared_9_cast_fp16)[name = string("cast_515")]; + tensor var_535 = pow(x = mean_squared_9_cast_fp16_to_fp32, y = var_522)[name = string("op_535")]; + string var_535_to_fp16_dtype_0 = const()[name = string("op_535_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_535_to_fp16 = cast(dtype = var_535_to_fp16_dtype_0, x = var_535)[name = string("cast_514")]; + tensor normed_output_17_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = var_535_to_fp16)[name = string("normed_output_17_cast_fp16")]; + tensor const_11_to_fp16 = const()[name = string("const_11_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6894336)))]; + tensor normed_output_19_cast_fp16 = mul(x = normed_output_17_cast_fp16, y = const_11_to_fp16)[name = string("normed_output_19_cast_fp16")]; + fp16 lconv1ds_0_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_0_linear_start_input_min_to_fp16"), val = fp16(-0x1.04p+5)]; + fp16 lconv1ds_0_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_0_linear_start_input_max_to_fp16"), val = fp16(0x1.02p+5)]; + tensor clip_16_cast_fp16 = clip(alpha = lconv1ds_0_linear_start_input_min_to_fp16, beta = lconv1ds_0_linear_start_input_max_to_fp16, x = normed_output_19_cast_fp16)[name = string("clip_16_cast_fp16")]; + tensor lconv1ds_0_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6896448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7945088))))[name = string("lconv1ds_0_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_8_bias_0_to_fp16 = const()[name = string("linear_8_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7947200)))]; + tensor linear_8_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_0_linear_start_linear_weight_to_fp16_palettized, x = clip_16_cast_fp16)[name = string("linear_8_cast_fp16")]; + fp16 lconv1ds_0_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_0_linear_start_output_min_to_fp16"), val = fp16(-0x1.dap+4)]; + fp16 lconv1ds_0_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_0_linear_start_output_max_to_fp16"), val = fp16(0x1.d6p+4)]; + tensor clip_17_cast_fp16 = clip(alpha = lconv1ds_0_linear_start_output_min_to_fp16, beta = lconv1ds_0_linear_start_output_max_to_fp16, x = linear_8_cast_fp16)[name = string("clip_17_cast_fp16")]; + int32 hidden_states_69_split_num_splits_0 = const()[name = string("hidden_states_69_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_69_split_axis_0 = const()[name = string("hidden_states_69_split_axis_0"), val = int32(-1)]; + tensor hidden_states_69_split_cast_fp16_0, tensor hidden_states_69_split_cast_fp16_1 = split(axis = hidden_states_69_split_axis_0, num_splits = hidden_states_69_split_num_splits_0, x = clip_17_cast_fp16)[name = string("hidden_states_69_split_cast_fp16")]; + tensor hidden_states_69_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_69_split_cast_fp16_1)[name = string("hidden_states_69_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = mul(x = hidden_states_69_split_cast_fp16_0, y = hidden_states_69_split_1_sigmoid_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor input_45_perm_0 = const()[name = string("input_45_perm_0"), val = tensor([0, 2, 1])]; + tensor input_47_pad_0 = const()[name = string("input_47_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_47_mode_0 = const()[name = string("input_47_mode_0"), val = string("constant")]; + fp16 const_12_to_fp16 = const()[name = string("const_12_to_fp16"), val = fp16(0x0p+0)]; + tensor input_45_cast_fp16 = transpose(perm = input_45_perm_0, x = hidden_states_69_cast_fp16)[name = string("transpose_67")]; + tensor input_47_cast_fp16 = pad(constant_val = const_12_to_fp16, mode = input_47_mode_0, pad = input_47_pad_0, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")]; + string var_561_pad_type_0 = const()[name = string("op_561_pad_type_0"), val = string("valid")]; + int32 var_561_groups_0 = const()[name = string("op_561_groups_0"), val = int32(1024)]; + tensor var_561_strides_0 = const()[name = string("op_561_strides_0"), val = tensor([1])]; + tensor var_561_pad_0 = const()[name = string("op_561_pad_0"), val = tensor([0, 0])]; + tensor var_561_dilations_0 = const()[name = string("op_561_dilations_0"), val = tensor([1])]; + tensor lconv1ds_0_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7951360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7953984))))[name = string("lconv1ds_0_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_561_cast_fp16 = conv(dilations = var_561_dilations_0, groups = var_561_groups_0, pad = var_561_pad_0, pad_type = var_561_pad_type_0, strides = var_561_strides_0, weight = lconv1ds_0_depthwise_conv1d_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = string("op_561_cast_fp16")]; + tensor hidden_states_71_perm_0 = const()[name = string("hidden_states_71_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_71_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_71_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_71_cast_fp16 = transpose(perm = hidden_states_71_perm_0, x = var_561_cast_fp16)[name = string("transpose_66")]; + tensor hidden_states_71_cast_fp16_to_fp32 = cast(dtype = hidden_states_71_cast_fp16_to_fp32_dtype_0, x = hidden_states_71_cast_fp16)[name = string("cast_513")]; + tensor clip_18 = clip(alpha = var_510, beta = var_509, x = hidden_states_71_cast_fp16_to_fp32)[name = string("clip_18")]; + fp32 var_518_promoted_1 = const()[name = string("op_518_promoted_1"), val = fp32(0x1p+1)]; + tensor var_566 = pow(x = clip_18, y = var_518_promoted_1)[name = string("op_566")]; + tensor var_568_axes_0 = const()[name = string("op_568_axes_0"), val = tensor([-1])]; + bool var_568_keep_dims_0 = const()[name = string("op_568_keep_dims_0"), val = bool(true)]; + tensor var_568 = reduce_mean(axes = var_568_axes_0, keep_dims = var_568_keep_dims_0, x = var_566)[name = string("op_568")]; + string var_568_to_fp16_dtype_0 = const()[name = string("op_568_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_569_to_fp16 = const()[name = string("op_569_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_568_to_fp16 = cast(dtype = var_568_to_fp16_dtype_0, x = var_568)[name = string("cast_512")]; + tensor mean_squared_11_cast_fp16 = add(x = var_568_to_fp16, y = var_569_to_fp16)[name = string("mean_squared_11_cast_fp16")]; + string mean_squared_11_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_11_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_11_cast_fp16_to_fp32 = cast(dtype = mean_squared_11_cast_fp16_to_fp32_dtype_0, x = mean_squared_11_cast_fp16)[name = string("cast_511")]; + tensor var_571 = pow(x = mean_squared_11_cast_fp16_to_fp32, y = var_522)[name = string("op_571")]; + string clip_18_to_fp16_dtype_0 = const()[name = string("clip_18_to_fp16_dtype_0"), val = string("fp16")]; + string var_571_to_fp16_dtype_0 = const()[name = string("op_571_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_18_to_fp16 = cast(dtype = clip_18_to_fp16_dtype_0, x = clip_18)[name = string("cast_509")]; + tensor var_571_to_fp16 = cast(dtype = var_571_to_fp16_dtype_0, x = var_571)[name = string("cast_510")]; + tensor normed_output_21_cast_fp16 = mul(x = clip_18_to_fp16, y = var_571_to_fp16)[name = string("normed_output_21_cast_fp16")]; + tensor const_13_to_fp16 = const()[name = string("const_13_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7955072)))]; + tensor normed_output_23_cast_fp16 = mul(x = normed_output_21_cast_fp16, y = const_13_to_fp16)[name = string("normed_output_23_cast_fp16")]; + tensor hidden_states_77_cast_fp16 = silu(x = normed_output_23_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; + fp16 lconv1ds_0_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_0_linear_end_input_min_to_fp16"), val = fp16(-0x1.74p+2)]; + fp16 lconv1ds_0_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_0_linear_end_input_max_to_fp16"), val = fp16(0x1.72p+2)]; + tensor clip_19_cast_fp16 = clip(alpha = lconv1ds_0_linear_end_input_min_to_fp16, beta = lconv1ds_0_linear_end_input_max_to_fp16, x = hidden_states_77_cast_fp16)[name = string("clip_19_cast_fp16")]; + tensor lconv1ds_0_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7957184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8481536))))[name = string("lconv1ds_0_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_9_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_0_linear_end_linear_weight_to_fp16_palettized, x = clip_19_cast_fp16)[name = string("linear_9_cast_fp16")]; + fp16 lconv1ds_0_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_0_linear_end_output_min_to_fp16"), val = fp16(-0x1.9p+2)]; + fp16 lconv1ds_0_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_0_linear_end_output_max_to_fp16"), val = fp16(0x1.8cp+2)]; + tensor clip_20_cast_fp16 = clip(alpha = lconv1ds_0_linear_end_output_min_to_fp16, beta = lconv1ds_0_linear_end_output_max_to_fp16, x = linear_9_cast_fp16)[name = string("clip_20_cast_fp16")]; + tensor hidden_states_83_cast_fp16 = add(x = clip_20_cast_fp16, y = hidden_states_61_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; + string hidden_states_83_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_83_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_595 = const()[name = string("op_595"), val = fp32(-0x1p-1)]; + fp32 var_596 = const()[name = string("op_596"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_597 = const()[name = string("op_597"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_83_cast_fp16_to_fp32 = cast(dtype = hidden_states_83_cast_fp16_to_fp32_dtype_0, x = hidden_states_83_cast_fp16)[name = string("cast_508")]; + tensor clip_21 = clip(alpha = var_597, beta = var_596, x = hidden_states_83_cast_fp16_to_fp32)[name = string("clip_21")]; + fp32 var_591_promoted = const()[name = string("op_591_promoted"), val = fp32(0x1p+1)]; + tensor var_605 = pow(x = clip_21, y = var_591_promoted)[name = string("op_605")]; + tensor var_607_axes_0 = const()[name = string("op_607_axes_0"), val = tensor([-1])]; + bool var_607_keep_dims_0 = const()[name = string("op_607_keep_dims_0"), val = bool(true)]; + tensor var_607 = reduce_mean(axes = var_607_axes_0, keep_dims = var_607_keep_dims_0, x = var_605)[name = string("op_607")]; + string var_607_to_fp16_dtype_0 = const()[name = string("op_607_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_607_to_fp16 = cast(dtype = var_607_to_fp16_dtype_0, x = var_607)[name = string("cast_507")]; + tensor mean_squared_13_cast_fp16 = add(x = var_607_to_fp16, y = var_608_to_fp16)[name = string("mean_squared_13_cast_fp16")]; + string mean_squared_13_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_13_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_13_cast_fp16_to_fp32 = cast(dtype = mean_squared_13_cast_fp16_to_fp32_dtype_0, x = mean_squared_13_cast_fp16)[name = string("cast_506")]; + tensor var_610 = pow(x = mean_squared_13_cast_fp16_to_fp32, y = var_595)[name = string("op_610")]; + string clip_21_to_fp16_dtype_0 = const()[name = string("clip_21_to_fp16_dtype_0"), val = string("fp16")]; + string var_610_to_fp16_dtype_0 = const()[name = string("op_610_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_21_to_fp16 = cast(dtype = clip_21_to_fp16_dtype_0, x = clip_21)[name = string("cast_504")]; + tensor var_610_to_fp16 = cast(dtype = var_610_to_fp16_dtype_0, x = var_610)[name = string("cast_505")]; + tensor normed_output_25_cast_fp16 = mul(x = clip_21_to_fp16, y = var_610_to_fp16)[name = string("normed_output_25_cast_fp16")]; + tensor const_14_to_fp16 = const()[name = string("const_14_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8482624)))]; + tensor normed_output_27_cast_fp16 = mul(x = normed_output_25_cast_fp16, y = const_14_to_fp16)[name = string("normed_output_27_cast_fp16")]; + fp16 feed_forward2s_0_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_0_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.66p+3)]; + fp16 feed_forward2s_0_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_0_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.62p+3)]; + tensor clip_22_cast_fp16 = clip(alpha = feed_forward2s_0_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_0_ffw_layer_1_input_max_to_fp16, x = normed_output_27_cast_fp16)[name = string("clip_22_cast_fp16")]; + tensor feed_forward2s_0_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8484736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10581952))))[name = string("feed_forward2s_0_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_0_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_22_cast_fp16)[name = string("linear_10_cast_fp16")]; + fp16 feed_forward2s_0_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_0_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.a6p+4)]; + fp16 feed_forward2s_0_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_0_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.a2p+4)]; + tensor clip_23_cast_fp16 = clip(alpha = feed_forward2s_0_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_0_ffw_layer_1_output_max_to_fp16, x = linear_10_cast_fp16)[name = string("clip_23_cast_fp16")]; + tensor hidden_states_93_cast_fp16 = silu(x = clip_23_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; + fp16 feed_forward2s_0_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_0_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.2ep+3)]; + fp16 feed_forward2s_0_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_0_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.2cp+3)]; + tensor clip_24_cast_fp16 = clip(alpha = feed_forward2s_0_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_0_ffw_layer_2_input_max_to_fp16, x = hidden_states_93_cast_fp16)[name = string("clip_24_cast_fp16")]; + tensor feed_forward2s_0_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10586112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12683328))))[name = string("feed_forward2s_0_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_11_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_0_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_24_cast_fp16)[name = string("linear_11_cast_fp16")]; + fp16 feed_forward2s_0_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_0_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.0ap+5)]; + fp16 feed_forward2s_0_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_0_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.08p+5)]; + tensor clip_25_cast_fp16 = clip(alpha = feed_forward2s_0_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_0_ffw_layer_2_output_max_to_fp16, x = linear_11_cast_fp16)[name = string("clip_25_cast_fp16")]; + string clip_25_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_25_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_25_cast_fp16_to_fp32 = cast(dtype = clip_25_cast_fp16_to_fp32_dtype_0, x = clip_25_cast_fp16)[name = string("cast_503")]; + tensor clip_26 = clip(alpha = var_597, beta = var_596, x = clip_25_cast_fp16_to_fp32)[name = string("clip_26")]; + fp32 var_591_promoted_1 = const()[name = string("op_591_promoted_1"), val = fp32(0x1p+1)]; + tensor var_637 = pow(x = clip_26, y = var_591_promoted_1)[name = string("op_637")]; + tensor var_639_axes_0 = const()[name = string("op_639_axes_0"), val = tensor([-1])]; + bool var_639_keep_dims_0 = const()[name = string("op_639_keep_dims_0"), val = bool(true)]; + tensor var_639 = reduce_mean(axes = var_639_axes_0, keep_dims = var_639_keep_dims_0, x = var_637)[name = string("op_639")]; + string var_639_to_fp16_dtype_0 = const()[name = string("op_639_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_639_to_fp16 = cast(dtype = var_639_to_fp16_dtype_0, x = var_639)[name = string("cast_502")]; + tensor mean_squared_15_cast_fp16 = add(x = var_639_to_fp16, y = var_640_to_fp16)[name = string("mean_squared_15_cast_fp16")]; + string mean_squared_15_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_15_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_15_cast_fp16_to_fp32 = cast(dtype = mean_squared_15_cast_fp16_to_fp32_dtype_0, x = mean_squared_15_cast_fp16)[name = string("cast_501")]; + tensor var_642 = pow(x = mean_squared_15_cast_fp16_to_fp32, y = var_595)[name = string("op_642")]; + string clip_26_to_fp16_dtype_0 = const()[name = string("clip_26_to_fp16_dtype_0"), val = string("fp16")]; + string var_642_to_fp16_dtype_0 = const()[name = string("op_642_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_26_to_fp16 = cast(dtype = clip_26_to_fp16_dtype_0, x = clip_26)[name = string("cast_499")]; + tensor var_642_to_fp16 = cast(dtype = var_642_to_fp16_dtype_0, x = var_642)[name = string("cast_500")]; + tensor normed_output_29_cast_fp16 = mul(x = clip_26_to_fp16, y = var_642_to_fp16)[name = string("normed_output_29_cast_fp16")]; + tensor const_15_to_fp16 = const()[name = string("const_15_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12684416)))]; + tensor normed_output_31_cast_fp16 = mul(x = normed_output_29_cast_fp16, y = const_15_to_fp16)[name = string("normed_output_31_cast_fp16")]; + fp16 var_587_to_fp16 = const()[name = string("op_587_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_105_cast_fp16 = mul(x = normed_output_31_cast_fp16, y = var_587_to_fp16)[name = string("hidden_states_105_cast_fp16")]; + tensor hidden_states_107_cast_fp16 = add(x = hidden_states_105_cast_fp16, y = hidden_states_83_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; + fp16 var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_27_cast_fp16 = clip(alpha = var_649_to_fp16, beta = var_650_to_fp16, x = hidden_states_107_cast_fp16)[name = string("clip_27_cast_fp16")]; + string clip_27_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_27_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_652 = const()[name = string("op_652"), val = fp32(-0x1p-1)]; + fp32 var_656_promoted = const()[name = string("op_656_promoted"), val = fp32(0x1p+1)]; + tensor clip_27_cast_fp16_to_fp32 = cast(dtype = clip_27_cast_fp16_to_fp32_dtype_0, x = clip_27_cast_fp16)[name = string("cast_498")]; + tensor var_662 = pow(x = clip_27_cast_fp16_to_fp32, y = var_656_promoted)[name = string("op_662")]; + tensor var_664_axes_0 = const()[name = string("op_664_axes_0"), val = tensor([-1])]; + bool var_664_keep_dims_0 = const()[name = string("op_664_keep_dims_0"), val = bool(true)]; + tensor var_664 = reduce_mean(axes = var_664_axes_0, keep_dims = var_664_keep_dims_0, x = var_662)[name = string("op_664")]; + string var_664_to_fp16_dtype_0 = const()[name = string("op_664_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_665_to_fp16 = const()[name = string("op_665_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_664_to_fp16 = cast(dtype = var_664_to_fp16_dtype_0, x = var_664)[name = string("cast_497")]; + tensor mean_squared_17_cast_fp16 = add(x = var_664_to_fp16, y = var_665_to_fp16)[name = string("mean_squared_17_cast_fp16")]; + string mean_squared_17_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_17_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_17_cast_fp16_to_fp32 = cast(dtype = mean_squared_17_cast_fp16_to_fp32_dtype_0, x = mean_squared_17_cast_fp16)[name = string("cast_496")]; + tensor var_667 = pow(x = mean_squared_17_cast_fp16_to_fp32, y = var_652)[name = string("op_667")]; + string var_667_to_fp16_dtype_0 = const()[name = string("op_667_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_667_to_fp16 = cast(dtype = var_667_to_fp16_dtype_0, x = var_667)[name = string("cast_495")]; + tensor normed_output_33_cast_fp16 = mul(x = clip_27_cast_fp16, y = var_667_to_fp16)[name = string("normed_output_33_cast_fp16")]; + tensor const_16_to_fp16 = const()[name = string("const_16_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12686528)))]; + tensor normed_output_35_cast_fp16 = mul(x = normed_output_33_cast_fp16, y = const_16_to_fp16)[name = string("normed_output_35_cast_fp16")]; + string normed_output_35_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_35_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_680 = const()[name = string("op_680"), val = fp32(-0x1p-1)]; + fp32 var_681 = const()[name = string("op_681"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_682 = const()[name = string("op_682"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_35_cast_fp16_to_fp32 = cast(dtype = normed_output_35_cast_fp16_to_fp32_dtype_0, x = normed_output_35_cast_fp16)[name = string("cast_494")]; + tensor clip_28 = clip(alpha = var_682, beta = var_681, x = normed_output_35_cast_fp16_to_fp32)[name = string("clip_28")]; + fp32 var_676_promoted = const()[name = string("op_676_promoted"), val = fp32(0x1p+1)]; + tensor var_690 = pow(x = clip_28, y = var_676_promoted)[name = string("op_690")]; + tensor var_692_axes_0 = const()[name = string("op_692_axes_0"), val = tensor([-1])]; + bool var_692_keep_dims_0 = const()[name = string("op_692_keep_dims_0"), val = bool(true)]; + tensor var_692 = reduce_mean(axes = var_692_axes_0, keep_dims = var_692_keep_dims_0, x = var_690)[name = string("op_692")]; + string var_692_to_fp16_dtype_0 = const()[name = string("op_692_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_692_to_fp16 = cast(dtype = var_692_to_fp16_dtype_0, x = var_692)[name = string("cast_493")]; + tensor mean_squared_19_cast_fp16 = add(x = var_692_to_fp16, y = var_693_to_fp16)[name = string("mean_squared_19_cast_fp16")]; + string mean_squared_19_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_19_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_19_cast_fp16_to_fp32 = cast(dtype = mean_squared_19_cast_fp16_to_fp32_dtype_0, x = mean_squared_19_cast_fp16)[name = string("cast_492")]; + tensor var_695 = pow(x = mean_squared_19_cast_fp16_to_fp32, y = var_680)[name = string("op_695")]; + string clip_28_to_fp16_dtype_0 = const()[name = string("clip_28_to_fp16_dtype_0"), val = string("fp16")]; + string var_695_to_fp16_dtype_0 = const()[name = string("op_695_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_28_to_fp16 = cast(dtype = clip_28_to_fp16_dtype_0, x = clip_28)[name = string("cast_490")]; + tensor var_695_to_fp16 = cast(dtype = var_695_to_fp16_dtype_0, x = var_695)[name = string("cast_491")]; + tensor normed_output_37_cast_fp16 = mul(x = clip_28_to_fp16, y = var_695_to_fp16)[name = string("normed_output_37_cast_fp16")]; + tensor const_17_to_fp16 = const()[name = string("const_17_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12688640)))]; + tensor normed_output_39_cast_fp16 = mul(x = normed_output_37_cast_fp16, y = const_17_to_fp16)[name = string("normed_output_39_cast_fp16")]; + fp16 feed_forward1s_1_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_1_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.86p+3)]; + fp16 feed_forward1s_1_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_1_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.84p+3)]; + tensor clip_29_cast_fp16 = clip(alpha = feed_forward1s_1_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_1_ffw_layer_1_input_max_to_fp16, x = normed_output_39_cast_fp16)[name = string("clip_29_cast_fp16")]; + tensor feed_forward1s_1_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12690752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14787968))))[name = string("feed_forward1s_1_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_12_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_1_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_29_cast_fp16)[name = string("linear_12_cast_fp16")]; + fp16 feed_forward1s_1_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_1_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.bap+4)]; + fp16 feed_forward1s_1_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_1_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.b6p+4)]; + tensor clip_30_cast_fp16 = clip(alpha = feed_forward1s_1_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_1_ffw_layer_1_output_max_to_fp16, x = linear_12_cast_fp16)[name = string("clip_30_cast_fp16")]; + tensor hidden_states_123_cast_fp16 = silu(x = clip_30_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; + fp16 feed_forward1s_1_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_1_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.36p+3)]; + fp16 feed_forward1s_1_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_1_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.34p+3)]; + tensor clip_31_cast_fp16 = clip(alpha = feed_forward1s_1_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_1_ffw_layer_2_input_max_to_fp16, x = hidden_states_123_cast_fp16)[name = string("clip_31_cast_fp16")]; + tensor feed_forward1s_1_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14792128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16889344))))[name = string("feed_forward1s_1_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_13_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_1_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_31_cast_fp16)[name = string("linear_13_cast_fp16")]; + fp16 feed_forward1s_1_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_1_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.3ep+5)]; + fp16 feed_forward1s_1_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_1_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.3cp+5)]; + tensor clip_32_cast_fp16 = clip(alpha = feed_forward1s_1_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_1_ffw_layer_2_output_max_to_fp16, x = linear_13_cast_fp16)[name = string("clip_32_cast_fp16")]; + string clip_32_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_32_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_32_cast_fp16_to_fp32 = cast(dtype = clip_32_cast_fp16_to_fp32_dtype_0, x = clip_32_cast_fp16)[name = string("cast_489")]; + tensor clip_33 = clip(alpha = var_682, beta = var_681, x = clip_32_cast_fp16_to_fp32)[name = string("clip_33")]; + fp32 var_676_promoted_1 = const()[name = string("op_676_promoted_1"), val = fp32(0x1p+1)]; + tensor var_722 = pow(x = clip_33, y = var_676_promoted_1)[name = string("op_722")]; + tensor var_724_axes_0 = const()[name = string("op_724_axes_0"), val = tensor([-1])]; + bool var_724_keep_dims_0 = const()[name = string("op_724_keep_dims_0"), val = bool(true)]; + tensor var_724 = reduce_mean(axes = var_724_axes_0, keep_dims = var_724_keep_dims_0, x = var_722)[name = string("op_724")]; + string var_724_to_fp16_dtype_0 = const()[name = string("op_724_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_725_to_fp16 = const()[name = string("op_725_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_724_to_fp16 = cast(dtype = var_724_to_fp16_dtype_0, x = var_724)[name = string("cast_488")]; + tensor mean_squared_21_cast_fp16 = add(x = var_724_to_fp16, y = var_725_to_fp16)[name = string("mean_squared_21_cast_fp16")]; + string mean_squared_21_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_21_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_21_cast_fp16_to_fp32 = cast(dtype = mean_squared_21_cast_fp16_to_fp32_dtype_0, x = mean_squared_21_cast_fp16)[name = string("cast_487")]; + tensor var_727 = pow(x = mean_squared_21_cast_fp16_to_fp32, y = var_680)[name = string("op_727")]; + string clip_33_to_fp16_dtype_0 = const()[name = string("clip_33_to_fp16_dtype_0"), val = string("fp16")]; + string var_727_to_fp16_dtype_0 = const()[name = string("op_727_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_33_to_fp16 = cast(dtype = clip_33_to_fp16_dtype_0, x = clip_33)[name = string("cast_485")]; + tensor var_727_to_fp16 = cast(dtype = var_727_to_fp16_dtype_0, x = var_727)[name = string("cast_486")]; + tensor normed_output_41_cast_fp16 = mul(x = clip_33_to_fp16, y = var_727_to_fp16)[name = string("normed_output_41_cast_fp16")]; + tensor const_18_to_fp16 = const()[name = string("const_18_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16890432)))]; + tensor normed_output_43_cast_fp16 = mul(x = normed_output_41_cast_fp16, y = const_18_to_fp16)[name = string("normed_output_43_cast_fp16")]; + fp16 var_672_to_fp16 = const()[name = string("op_672_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_135_cast_fp16 = mul(x = normed_output_43_cast_fp16, y = var_672_to_fp16)[name = string("hidden_states_135_cast_fp16")]; + tensor hidden_states_137_cast_fp16 = add(x = hidden_states_135_cast_fp16, y = normed_output_35_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; + fp16 var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_34_cast_fp16 = clip(alpha = var_734_to_fp16, beta = var_735_to_fp16, x = hidden_states_137_cast_fp16)[name = string("clip_34_cast_fp16")]; + string clip_34_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_34_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_737 = const()[name = string("op_737"), val = fp32(-0x1p-1)]; + fp32 var_741_promoted = const()[name = string("op_741_promoted"), val = fp32(0x1p+1)]; + tensor clip_34_cast_fp16_to_fp32 = cast(dtype = clip_34_cast_fp16_to_fp32_dtype_0, x = clip_34_cast_fp16)[name = string("cast_484")]; + tensor var_747 = pow(x = clip_34_cast_fp16_to_fp32, y = var_741_promoted)[name = string("op_747")]; + tensor var_749_axes_0 = const()[name = string("op_749_axes_0"), val = tensor([-1])]; + bool var_749_keep_dims_0 = const()[name = string("op_749_keep_dims_0"), val = bool(true)]; + tensor var_749 = reduce_mean(axes = var_749_axes_0, keep_dims = var_749_keep_dims_0, x = var_747)[name = string("op_749")]; + string var_749_to_fp16_dtype_0 = const()[name = string("op_749_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_750_to_fp16 = const()[name = string("op_750_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_749_to_fp16 = cast(dtype = var_749_to_fp16_dtype_0, x = var_749)[name = string("cast_483")]; + tensor mean_squared_23_cast_fp16 = add(x = var_749_to_fp16, y = var_750_to_fp16)[name = string("mean_squared_23_cast_fp16")]; + string mean_squared_23_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_23_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_23_cast_fp16_to_fp32 = cast(dtype = mean_squared_23_cast_fp16_to_fp32_dtype_0, x = mean_squared_23_cast_fp16)[name = string("cast_482")]; + tensor var_752 = pow(x = mean_squared_23_cast_fp16_to_fp32, y = var_737)[name = string("op_752")]; + string var_752_to_fp16_dtype_0 = const()[name = string("op_752_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_752_to_fp16 = cast(dtype = var_752_to_fp16_dtype_0, x = var_752)[name = string("cast_481")]; + tensor normed_output_45_cast_fp16 = mul(x = clip_34_cast_fp16, y = var_752_to_fp16)[name = string("normed_output_45_cast_fp16")]; + tensor const_19_to_fp16 = const()[name = string("const_19_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16892544)))]; + tensor normed_output_47_cast_fp16 = mul(x = normed_output_45_cast_fp16, y = const_19_to_fp16)[name = string("normed_output_47_cast_fp16")]; + int32 var_758 = const()[name = string("op_758"), val = int32(-1)]; + fp32 var_759 = const()[name = string("op_759"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_1_q_proj_input_min_to_fp16 = const()[name = string("self_attns_1_q_proj_input_min_to_fp16"), val = fp16(-0x1.6cp+3)]; + fp16 self_attns_1_q_proj_input_max_to_fp16 = const()[name = string("self_attns_1_q_proj_input_max_to_fp16"), val = fp16(0x1.6ap+3)]; + tensor clip_35_cast_fp16 = clip(alpha = self_attns_1_q_proj_input_min_to_fp16, beta = self_attns_1_q_proj_input_max_to_fp16, x = normed_output_47_cast_fp16)[name = string("clip_35_cast_fp16")]; + tensor self_attns_1_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16894656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17419008))))[name = string("self_attns_1_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_1_q_proj_linear_weight_to_fp16_palettized, x = clip_35_cast_fp16)[name = string("linear_14_cast_fp16")]; + fp16 self_attns_1_q_proj_output_min_to_fp16 = const()[name = string("self_attns_1_q_proj_output_min_to_fp16"), val = fp16(-0x1.6p+4)]; + fp16 self_attns_1_q_proj_output_max_to_fp16 = const()[name = string("self_attns_1_q_proj_output_max_to_fp16"), val = fp16(0x1.5ep+4)]; + tensor clip_36_cast_fp16 = clip(alpha = self_attns_1_q_proj_output_min_to_fp16, beta = self_attns_1_q_proj_output_max_to_fp16, x = linear_14_cast_fp16)[name = string("clip_36_cast_fp16")]; + tensor var_803 = const()[name = string("op_803"), val = tensor([1, 50, 8, 128])]; + tensor q_3_cast_fp16 = reshape(shape = var_803, x = clip_36_cast_fp16)[name = string("q_3_cast_fp16")]; + tensor self_attns_1_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17420096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17944448))))[name = string("self_attns_1_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_15_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_1_k_proj_linear_weight_to_fp16_palettized, x = clip_35_cast_fp16)[name = string("linear_15_cast_fp16")]; + fp16 self_attns_1_k_proj_output_min_to_fp16 = const()[name = string("self_attns_1_k_proj_output_min_to_fp16"), val = fp16(-0x1.6p+4)]; + fp16 self_attns_1_k_proj_output_max_to_fp16 = const()[name = string("self_attns_1_k_proj_output_max_to_fp16"), val = fp16(0x1.5ep+4)]; + tensor clip_38_cast_fp16 = clip(alpha = self_attns_1_k_proj_output_min_to_fp16, beta = self_attns_1_k_proj_output_max_to_fp16, x = linear_15_cast_fp16)[name = string("clip_38_cast_fp16")]; + tensor var_815 = const()[name = string("op_815"), val = tensor([1, 50, 8, 128])]; + tensor k_3_cast_fp16 = reshape(shape = var_815, x = clip_38_cast_fp16)[name = string("k_3_cast_fp16")]; + tensor self_attns_1_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17945536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18469888))))[name = string("self_attns_1_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_1_v_proj_linear_weight_to_fp16_palettized, x = clip_35_cast_fp16)[name = string("linear_16_cast_fp16")]; + fp16 self_attns_1_v_proj_output_min_to_fp16 = const()[name = string("self_attns_1_v_proj_output_min_to_fp16"), val = fp16(-0x1.6p+4)]; + fp16 self_attns_1_v_proj_output_max_to_fp16 = const()[name = string("self_attns_1_v_proj_output_max_to_fp16"), val = fp16(0x1.5ep+4)]; + tensor clip_40_cast_fp16 = clip(alpha = self_attns_1_v_proj_output_min_to_fp16, beta = self_attns_1_v_proj_output_max_to_fp16, x = linear_16_cast_fp16)[name = string("clip_40_cast_fp16")]; + tensor var_827 = const()[name = string("op_827"), val = tensor([1, 50, 8, 128])]; + tensor input_75_cast_fp16 = reshape(shape = var_827, x = clip_40_cast_fp16)[name = string("input_75_cast_fp16")]; + fp16 var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_830_cast_fp16 = mul(x = q_3_cast_fp16, y = var_829_to_fp16)[name = string("op_830_cast_fp16")]; + tensor var_831_to_fp16 = const()[name = string("op_831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18470976)))]; + tensor input_71_cast_fp16 = mul(x = var_830_cast_fp16, y = var_831_to_fp16)[name = string("input_71_cast_fp16")]; + fp16 var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_73_cast_fp16 = mul(x = k_3_cast_fp16, y = var_833_to_fp16)[name = string("input_73_cast_fp16")]; + tensor q_padded_3_pad_0 = const()[name = string("q_padded_3_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_3_mode_0 = const()[name = string("q_padded_3_mode_0"), val = string("constant")]; + fp16 const_20_to_fp16 = const()[name = string("const_20_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_3_cast_fp16 = pad(constant_val = const_20_to_fp16, mode = q_padded_3_mode_0, pad = q_padded_3_pad_0, x = input_71_cast_fp16)[name = string("q_padded_3_cast_fp16")]; + tensor var_837 = const()[name = string("op_837"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_3_cast_fp16 = reshape(shape = var_837, x = q_padded_3_cast_fp16)[name = string("q_blocks_3_cast_fp16")]; + tensor k_padded_3_pad_0 = const()[name = string("k_padded_3_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_3_mode_0 = const()[name = string("k_padded_3_mode_0"), val = string("constant")]; + fp16 const_21_to_fp16 = const()[name = string("const_21_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_3_cast_fp16 = pad(constant_val = const_21_to_fp16, mode = k_padded_3_mode_0, pad = k_padded_3_pad_0, x = input_73_cast_fp16)[name = string("k_padded_3_cast_fp16")]; + tensor v_padded_3_pad_0 = const()[name = string("v_padded_3_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_3_mode_0 = const()[name = string("v_padded_3_mode_0"), val = string("constant")]; + fp16 const_22_to_fp16 = const()[name = string("const_22_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_3_cast_fp16 = pad(constant_val = const_22_to_fp16, mode = v_padded_3_mode_0, pad = v_padded_3_pad_0, x = input_75_cast_fp16)[name = string("v_padded_3_cast_fp16")]; + tensor var_844_begin_0 = const()[name = string("op_844_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_844_end_0 = const()[name = string("op_844_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_844_end_mask_0 = const()[name = string("op_844_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_844_cast_fp16 = slice_by_index(begin = var_844_begin_0, end = var_844_end_0, end_mask = var_844_end_mask_0, x = k_padded_3_cast_fp16)[name = string("op_844_cast_fp16")]; + tensor var_846_begin_0 = const()[name = string("op_846_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_846_end_0 = const()[name = string("op_846_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_846_end_mask_0 = const()[name = string("op_846_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_846_cast_fp16 = slice_by_index(begin = var_846_begin_0, end = var_846_end_0, end_mask = var_846_end_mask_0, x = k_padded_3_cast_fp16)[name = string("op_846_cast_fp16")]; + tensor var_848_begin_0 = const()[name = string("op_848_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_848_end_0 = const()[name = string("op_848_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_848_end_mask_0 = const()[name = string("op_848_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_848_cast_fp16 = slice_by_index(begin = var_848_begin_0, end = var_848_end_0, end_mask = var_848_end_mask_0, x = k_padded_3_cast_fp16)[name = string("op_848_cast_fp16")]; + tensor var_850_begin_0 = const()[name = string("op_850_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_850_end_0 = const()[name = string("op_850_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_850_end_mask_0 = const()[name = string("op_850_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_850_cast_fp16 = slice_by_index(begin = var_850_begin_0, end = var_850_end_0, end_mask = var_850_end_mask_0, x = k_padded_3_cast_fp16)[name = string("op_850_cast_fp16")]; + tensor var_852_begin_0 = const()[name = string("op_852_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_852_end_0 = const()[name = string("op_852_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_852_end_mask_0 = const()[name = string("op_852_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_852_cast_fp16 = slice_by_index(begin = var_852_begin_0, end = var_852_end_0, end_mask = var_852_end_mask_0, x = k_padded_3_cast_fp16)[name = string("op_852_cast_fp16")]; + int32 k_blocks_3_axis_0 = const()[name = string("k_blocks_3_axis_0"), val = int32(1)]; + tensor k_blocks_3_cast_fp16 = stack(axis = k_blocks_3_axis_0, values = (var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16, var_850_cast_fp16, var_852_cast_fp16))[name = string("k_blocks_3_cast_fp16")]; + tensor var_856_begin_0 = const()[name = string("op_856_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_856_end_0 = const()[name = string("op_856_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_856_end_mask_0 = const()[name = string("op_856_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_856_cast_fp16 = slice_by_index(begin = var_856_begin_0, end = var_856_end_0, end_mask = var_856_end_mask_0, x = v_padded_3_cast_fp16)[name = string("op_856_cast_fp16")]; + tensor var_858_begin_0 = const()[name = string("op_858_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_858_end_0 = const()[name = string("op_858_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_858_end_mask_0 = const()[name = string("op_858_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_858_cast_fp16 = slice_by_index(begin = var_858_begin_0, end = var_858_end_0, end_mask = var_858_end_mask_0, x = v_padded_3_cast_fp16)[name = string("op_858_cast_fp16")]; + tensor var_860_begin_0 = const()[name = string("op_860_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_860_end_0 = const()[name = string("op_860_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_860_end_mask_0 = const()[name = string("op_860_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_860_cast_fp16 = slice_by_index(begin = var_860_begin_0, end = var_860_end_0, end_mask = var_860_end_mask_0, x = v_padded_3_cast_fp16)[name = string("op_860_cast_fp16")]; + tensor var_862_begin_0 = const()[name = string("op_862_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_862_end_0 = const()[name = string("op_862_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_862_end_mask_0 = const()[name = string("op_862_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_862_cast_fp16 = slice_by_index(begin = var_862_begin_0, end = var_862_end_0, end_mask = var_862_end_mask_0, x = v_padded_3_cast_fp16)[name = string("op_862_cast_fp16")]; + tensor var_864_begin_0 = const()[name = string("op_864_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_864_end_0 = const()[name = string("op_864_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_864_end_mask_0 = const()[name = string("op_864_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_864_cast_fp16 = slice_by_index(begin = var_864_begin_0, end = var_864_end_0, end_mask = var_864_end_mask_0, x = v_padded_3_cast_fp16)[name = string("op_864_cast_fp16")]; + int32 v_blocks_3_axis_0 = const()[name = string("v_blocks_3_axis_0"), val = int32(1)]; + tensor v_blocks_3_cast_fp16 = stack(axis = v_blocks_3_axis_0, values = (var_856_cast_fp16, var_858_cast_fp16, var_860_cast_fp16, var_862_cast_fp16, var_864_cast_fp16))[name = string("v_blocks_3_cast_fp16")]; + tensor var_872 = const()[name = string("op_872"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_874 = const()[name = string("op_874"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_3_transpose_x_0 = const()[name = string("matrix_ac_3_transpose_x_0"), val = bool(false)]; + bool matrix_ac_3_transpose_y_0 = const()[name = string("matrix_ac_3_transpose_y_0"), val = bool(false)]; + tensor queries_3_cast_fp16 = transpose(perm = var_872, x = q_blocks_3_cast_fp16)[name = string("transpose_64")]; + tensor keys_t_3_cast_fp16 = transpose(perm = var_874, x = k_blocks_3_cast_fp16)[name = string("transpose_65")]; + tensor matrix_ac_3_cast_fp16 = matmul(transpose_x = matrix_ac_3_transpose_x_0, transpose_y = matrix_ac_3_transpose_y_0, x = queries_3_cast_fp16, y = keys_t_3_cast_fp16)[name = string("matrix_ac_3_cast_fp16")]; + tensor var_877 = const()[name = string("op_877"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_3_cast_fp16 = reshape(shape = var_877, x = queries_3_cast_fp16)[name = string("q_flat_3_cast_fp16")]; + bool matrix_bd_11_transpose_x_0 = const()[name = string("matrix_bd_11_transpose_x_0"), val = bool(false)]; + bool matrix_bd_11_transpose_y_0 = const()[name = string("matrix_bd_11_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_3_to_fp16 = const()[name = string("rel_k_t_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18471296)))]; + tensor matrix_bd_11_cast_fp16 = matmul(transpose_x = matrix_bd_11_transpose_x_0, transpose_y = matrix_bd_11_transpose_y_0, x = q_flat_3_cast_fp16, y = rel_k_t_3_to_fp16)[name = string("matrix_bd_11_cast_fp16")]; + tensor var_882 = const()[name = string("op_882"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_77_cast_fp16 = reshape(shape = var_882, x = matrix_bd_11_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor matrix_bd_13_pad_0 = const()[name = string("matrix_bd_13_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18497984)))]; + string matrix_bd_13_mode_0 = const()[name = string("matrix_bd_13_mode_0"), val = string("constant")]; + fp16 const_24_to_fp16 = const()[name = string("const_24_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_13_cast_fp16 = pad(constant_val = const_24_to_fp16, mode = matrix_bd_13_mode_0, pad = matrix_bd_13_pad_0, x = input_77_cast_fp16)[name = string("matrix_bd_13_cast_fp16")]; + tensor var_886 = const()[name = string("op_886"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_15_cast_fp16 = reshape(shape = var_886, x = matrix_bd_13_cast_fp16)[name = string("matrix_bd_15_cast_fp16")]; + tensor matrix_bd_17_begin_0 = const()[name = string("matrix_bd_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_17_end_0 = const()[name = string("matrix_bd_17_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_17_end_mask_0 = const()[name = string("matrix_bd_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_17_cast_fp16 = slice_by_index(begin = matrix_bd_17_begin_0, end = matrix_bd_17_end_0, end_mask = matrix_bd_17_end_mask_0, x = matrix_bd_15_cast_fp16)[name = string("matrix_bd_17_cast_fp16")]; + tensor var_892 = const()[name = string("op_892"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_19_cast_fp16 = reshape(shape = var_892, x = matrix_bd_17_cast_fp16)[name = string("matrix_bd_19_cast_fp16")]; + tensor attn_7_cast_fp16 = add(x = matrix_ac_3_cast_fp16, y = matrix_bd_19_cast_fp16)[name = string("attn_7_cast_fp16")]; + fp16 _inversed_895_y_0_to_fp16 = const()[name = string("_inversed_895_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_895_cast_fp16 = mul(x = attn_7_cast_fp16, y = _inversed_895_y_0_to_fp16)[name = string("_inversed_895_cast_fp16")]; + string _inversed_895_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_895_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_895_cast_fp16_to_fp32 = cast(dtype = _inversed_895_cast_fp16_to_fp32_dtype_0, x = _inversed_895_cast_fp16)[name = string("cast_480")]; + tensor var_896 = tanh(x = _inversed_895_cast_fp16_to_fp32)[name = string("op_896")]; + string var_896_to_fp16_dtype_0 = const()[name = string("op_896_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_1_softcap_to_fp16 = const()[name = string("self_attns_1_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_896_to_fp16 = cast(dtype = var_896_to_fp16_dtype_0, x = var_896)[name = string("cast_479")]; + tensor attn_9_cast_fp16 = mul(x = var_896_to_fp16, y = self_attns_1_softcap_to_fp16)[name = string("attn_9_cast_fp16")]; + string attn_9_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_9_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_9_cast_fp16_to_fp32 = cast(dtype = attn_9_cast_fp16_to_fp32_dtype_0, x = attn_9_cast_fp16)[name = string("cast_478")]; + tensor input_79 = select(a = var_759, b = attn_9_cast_fp16_to_fp32, cond = var_460)[name = string("input_79")]; + tensor var_900 = softmax(axis = var_758, x = input_79)[name = string("op_900")]; + tensor var_902 = const()[name = string("op_902"), val = tensor([0, 3, 1, -3, -1])]; + bool out_7_transpose_x_0 = const()[name = string("out_7_transpose_x_0"), val = bool(false)]; + bool out_7_transpose_y_0 = const()[name = string("out_7_transpose_y_0"), val = bool(false)]; + string var_900_to_fp16_dtype_0 = const()[name = string("op_900_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_3_cast_fp16 = transpose(perm = var_902, x = v_blocks_3_cast_fp16)[name = string("transpose_63")]; + tensor var_900_to_fp16 = cast(dtype = var_900_to_fp16_dtype_0, x = var_900)[name = string("cast_477")]; + tensor out_7_cast_fp16 = matmul(transpose_x = out_7_transpose_x_0, transpose_y = out_7_transpose_y_0, x = var_900_to_fp16, y = values_t_3_cast_fp16)[name = string("out_7_cast_fp16")]; + tensor var_905 = const()[name = string("op_905"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_907 = const()[name = string("op_907"), val = tensor([1, 60, 1024])]; + tensor var_906_cast_fp16 = transpose(perm = var_905, x = out_7_cast_fp16)[name = string("transpose_62")]; + tensor out_9_cast_fp16 = reshape(shape = var_907, x = var_906_cast_fp16)[name = string("out_9_cast_fp16")]; + tensor var_910_begin_0 = const()[name = string("op_910_begin_0"), val = tensor([0, 0, 0])]; + tensor var_910_end_0 = const()[name = string("op_910_end_0"), val = tensor([1, 50, 1024])]; + tensor var_910_end_mask_0 = const()[name = string("op_910_end_mask_0"), val = tensor([true, false, true])]; + tensor var_910_cast_fp16 = slice_by_index(begin = var_910_begin_0, end = var_910_end_0, end_mask = var_910_end_mask_0, x = out_9_cast_fp16)[name = string("op_910_cast_fp16")]; + fp16 self_attns_1_post_input_min_to_fp16 = const()[name = string("self_attns_1_post_input_min_to_fp16"), val = fp16(-0x1.1ep+4)]; + fp16 self_attns_1_post_input_max_to_fp16 = const()[name = string("self_attns_1_post_input_max_to_fp16"), val = fp16(0x1.1cp+4)]; + tensor clip_41_cast_fp16 = clip(alpha = self_attns_1_post_input_min_to_fp16, beta = self_attns_1_post_input_max_to_fp16, x = var_910_cast_fp16)[name = string("clip_41_cast_fp16")]; + tensor self_attns_1_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18498112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19022464))))[name = string("self_attns_1_post_linear_weight_to_fp16_palettized")]; + tensor linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_1_post_linear_weight_to_fp16_palettized, x = clip_41_cast_fp16)[name = string("linear_18_cast_fp16")]; + fp16 self_attns_1_post_output_min_to_fp16 = const()[name = string("self_attns_1_post_output_min_to_fp16"), val = fp16(-0x1.dp+5)]; + fp16 self_attns_1_post_output_max_to_fp16 = const()[name = string("self_attns_1_post_output_max_to_fp16"), val = fp16(0x1.ccp+5)]; + tensor clip_42_cast_fp16 = clip(alpha = self_attns_1_post_output_min_to_fp16, beta = self_attns_1_post_output_max_to_fp16, x = linear_18_cast_fp16)[name = string("clip_42_cast_fp16")]; + fp16 var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_43_cast_fp16 = clip(alpha = var_922_to_fp16, beta = var_923_to_fp16, x = clip_42_cast_fp16)[name = string("clip_43_cast_fp16")]; + string clip_43_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_43_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_925 = const()[name = string("op_925"), val = fp32(-0x1p-1)]; + fp32 var_929_promoted = const()[name = string("op_929_promoted"), val = fp32(0x1p+1)]; + tensor clip_43_cast_fp16_to_fp32 = cast(dtype = clip_43_cast_fp16_to_fp32_dtype_0, x = clip_43_cast_fp16)[name = string("cast_476")]; + tensor var_935 = pow(x = clip_43_cast_fp16_to_fp32, y = var_929_promoted)[name = string("op_935")]; + tensor var_937_axes_0 = const()[name = string("op_937_axes_0"), val = tensor([-1])]; + bool var_937_keep_dims_0 = const()[name = string("op_937_keep_dims_0"), val = bool(true)]; + tensor var_937 = reduce_mean(axes = var_937_axes_0, keep_dims = var_937_keep_dims_0, x = var_935)[name = string("op_937")]; + string var_937_to_fp16_dtype_0 = const()[name = string("op_937_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_938_to_fp16 = const()[name = string("op_938_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_937_to_fp16 = cast(dtype = var_937_to_fp16_dtype_0, x = var_937)[name = string("cast_475")]; + tensor mean_squared_25_cast_fp16 = add(x = var_937_to_fp16, y = var_938_to_fp16)[name = string("mean_squared_25_cast_fp16")]; + string mean_squared_25_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_25_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_25_cast_fp16_to_fp32 = cast(dtype = mean_squared_25_cast_fp16_to_fp32_dtype_0, x = mean_squared_25_cast_fp16)[name = string("cast_474")]; + tensor var_940 = pow(x = mean_squared_25_cast_fp16_to_fp32, y = var_925)[name = string("op_940")]; + string var_940_to_fp16_dtype_0 = const()[name = string("op_940_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_940_to_fp16 = cast(dtype = var_940_to_fp16_dtype_0, x = var_940)[name = string("cast_473")]; + tensor normed_output_49_cast_fp16 = mul(x = clip_43_cast_fp16, y = var_940_to_fp16)[name = string("normed_output_49_cast_fp16")]; + tensor const_25_to_fp16 = const()[name = string("const_25_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19023552)))]; + tensor normed_output_51_cast_fp16 = mul(x = normed_output_49_cast_fp16, y = const_25_to_fp16)[name = string("normed_output_51_cast_fp16")]; + tensor hidden_states_163_cast_fp16 = add(x = normed_output_51_cast_fp16, y = hidden_states_137_cast_fp16)[name = string("hidden_states_163_cast_fp16")]; + string hidden_states_163_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_163_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_947 = const()[name = string("op_947"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_948 = const()[name = string("op_948"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_960 = const()[name = string("op_960"), val = fp32(-0x1p-1)]; + fp32 var_956_promoted = const()[name = string("op_956_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_163_cast_fp16_to_fp32 = cast(dtype = hidden_states_163_cast_fp16_to_fp32_dtype_0, x = hidden_states_163_cast_fp16)[name = string("cast_472")]; + tensor var_968 = pow(x = hidden_states_163_cast_fp16_to_fp32, y = var_956_promoted)[name = string("op_968")]; + tensor var_970_axes_0 = const()[name = string("op_970_axes_0"), val = tensor([-1])]; + bool var_970_keep_dims_0 = const()[name = string("op_970_keep_dims_0"), val = bool(true)]; + tensor var_970 = reduce_mean(axes = var_970_axes_0, keep_dims = var_970_keep_dims_0, x = var_968)[name = string("op_970")]; + string var_970_to_fp16_dtype_0 = const()[name = string("op_970_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_971_to_fp16 = const()[name = string("op_971_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_970_to_fp16 = cast(dtype = var_970_to_fp16_dtype_0, x = var_970)[name = string("cast_471")]; + tensor mean_squared_27_cast_fp16 = add(x = var_970_to_fp16, y = var_971_to_fp16)[name = string("mean_squared_27_cast_fp16")]; + string mean_squared_27_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_27_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_27_cast_fp16_to_fp32 = cast(dtype = mean_squared_27_cast_fp16_to_fp32_dtype_0, x = mean_squared_27_cast_fp16)[name = string("cast_470")]; + tensor var_973 = pow(x = mean_squared_27_cast_fp16_to_fp32, y = var_960)[name = string("op_973")]; + string var_973_to_fp16_dtype_0 = const()[name = string("op_973_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_973_to_fp16 = cast(dtype = var_973_to_fp16_dtype_0, x = var_973)[name = string("cast_469")]; + tensor normed_output_53_cast_fp16 = mul(x = hidden_states_163_cast_fp16, y = var_973_to_fp16)[name = string("normed_output_53_cast_fp16")]; + tensor const_26_to_fp16 = const()[name = string("const_26_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19025664)))]; + tensor normed_output_55_cast_fp16 = mul(x = normed_output_53_cast_fp16, y = const_26_to_fp16)[name = string("normed_output_55_cast_fp16")]; + fp16 lconv1ds_1_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_1_linear_start_input_min_to_fp16"), val = fp16(-0x1.46p+4)]; + fp16 lconv1ds_1_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_1_linear_start_input_max_to_fp16"), val = fp16(0x1.42p+4)]; + tensor clip_44_cast_fp16 = clip(alpha = lconv1ds_1_linear_start_input_min_to_fp16, beta = lconv1ds_1_linear_start_input_max_to_fp16, x = normed_output_55_cast_fp16)[name = string("clip_44_cast_fp16")]; + tensor lconv1ds_1_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19027776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20076416))))[name = string("lconv1ds_1_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_19_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_1_linear_start_linear_weight_to_fp16_palettized, x = clip_44_cast_fp16)[name = string("linear_19_cast_fp16")]; + fp16 lconv1ds_1_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_1_linear_start_output_min_to_fp16"), val = fp16(-0x1.aep+4)]; + fp16 lconv1ds_1_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_1_linear_start_output_max_to_fp16"), val = fp16(0x1.aap+4)]; + tensor clip_45_cast_fp16 = clip(alpha = lconv1ds_1_linear_start_output_min_to_fp16, beta = lconv1ds_1_linear_start_output_max_to_fp16, x = linear_19_cast_fp16)[name = string("clip_45_cast_fp16")]; + int32 hidden_states_171_split_num_splits_0 = const()[name = string("hidden_states_171_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_171_split_axis_0 = const()[name = string("hidden_states_171_split_axis_0"), val = int32(-1)]; + tensor hidden_states_171_split_cast_fp16_0, tensor hidden_states_171_split_cast_fp16_1 = split(axis = hidden_states_171_split_axis_0, num_splits = hidden_states_171_split_num_splits_0, x = clip_45_cast_fp16)[name = string("hidden_states_171_split_cast_fp16")]; + tensor hidden_states_171_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_171_split_cast_fp16_1)[name = string("hidden_states_171_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_171_cast_fp16 = mul(x = hidden_states_171_split_cast_fp16_0, y = hidden_states_171_split_1_sigmoid_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; + tensor input_87_perm_0 = const()[name = string("input_87_perm_0"), val = tensor([0, 2, 1])]; + tensor input_89_pad_0 = const()[name = string("input_89_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("constant")]; + fp16 const_27_to_fp16 = const()[name = string("const_27_to_fp16"), val = fp16(0x0p+0)]; + tensor input_87_cast_fp16 = transpose(perm = input_87_perm_0, x = hidden_states_171_cast_fp16)[name = string("transpose_61")]; + tensor input_89_cast_fp16 = pad(constant_val = const_27_to_fp16, mode = input_89_mode_0, pad = input_89_pad_0, x = input_87_cast_fp16)[name = string("input_89_cast_fp16")]; + string var_999_pad_type_0 = const()[name = string("op_999_pad_type_0"), val = string("valid")]; + int32 var_999_groups_0 = const()[name = string("op_999_groups_0"), val = int32(1024)]; + tensor var_999_strides_0 = const()[name = string("op_999_strides_0"), val = tensor([1])]; + tensor var_999_pad_0 = const()[name = string("op_999_pad_0"), val = tensor([0, 0])]; + tensor var_999_dilations_0 = const()[name = string("op_999_dilations_0"), val = tensor([1])]; + tensor lconv1ds_1_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20078528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20081152))))[name = string("lconv1ds_1_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_999_cast_fp16 = conv(dilations = var_999_dilations_0, groups = var_999_groups_0, pad = var_999_pad_0, pad_type = var_999_pad_type_0, strides = var_999_strides_0, weight = lconv1ds_1_depthwise_conv1d_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_999_cast_fp16")]; + tensor hidden_states_173_perm_0 = const()[name = string("hidden_states_173_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_173_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_173_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_173_cast_fp16 = transpose(perm = hidden_states_173_perm_0, x = var_999_cast_fp16)[name = string("transpose_60")]; + tensor hidden_states_173_cast_fp16_to_fp32 = cast(dtype = hidden_states_173_cast_fp16_to_fp32_dtype_0, x = hidden_states_173_cast_fp16)[name = string("cast_468")]; + tensor clip_46 = clip(alpha = var_948, beta = var_947, x = hidden_states_173_cast_fp16_to_fp32)[name = string("clip_46")]; + fp32 var_956_promoted_1 = const()[name = string("op_956_promoted_1"), val = fp32(0x1p+1)]; + tensor var_1004 = pow(x = clip_46, y = var_956_promoted_1)[name = string("op_1004")]; + tensor var_1006_axes_0 = const()[name = string("op_1006_axes_0"), val = tensor([-1])]; + bool var_1006_keep_dims_0 = const()[name = string("op_1006_keep_dims_0"), val = bool(true)]; + tensor var_1006 = reduce_mean(axes = var_1006_axes_0, keep_dims = var_1006_keep_dims_0, x = var_1004)[name = string("op_1006")]; + string var_1006_to_fp16_dtype_0 = const()[name = string("op_1006_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1007_to_fp16 = const()[name = string("op_1007_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1006_to_fp16 = cast(dtype = var_1006_to_fp16_dtype_0, x = var_1006)[name = string("cast_467")]; + tensor mean_squared_29_cast_fp16 = add(x = var_1006_to_fp16, y = var_1007_to_fp16)[name = string("mean_squared_29_cast_fp16")]; + string mean_squared_29_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_29_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_29_cast_fp16_to_fp32 = cast(dtype = mean_squared_29_cast_fp16_to_fp32_dtype_0, x = mean_squared_29_cast_fp16)[name = string("cast_466")]; + tensor var_1009 = pow(x = mean_squared_29_cast_fp16_to_fp32, y = var_960)[name = string("op_1009")]; + string clip_46_to_fp16_dtype_0 = const()[name = string("clip_46_to_fp16_dtype_0"), val = string("fp16")]; + string var_1009_to_fp16_dtype_0 = const()[name = string("op_1009_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_46_to_fp16 = cast(dtype = clip_46_to_fp16_dtype_0, x = clip_46)[name = string("cast_464")]; + tensor var_1009_to_fp16 = cast(dtype = var_1009_to_fp16_dtype_0, x = var_1009)[name = string("cast_465")]; + tensor normed_output_57_cast_fp16 = mul(x = clip_46_to_fp16, y = var_1009_to_fp16)[name = string("normed_output_57_cast_fp16")]; + tensor const_28_to_fp16 = const()[name = string("const_28_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20082240)))]; + tensor normed_output_59_cast_fp16 = mul(x = normed_output_57_cast_fp16, y = const_28_to_fp16)[name = string("normed_output_59_cast_fp16")]; + tensor hidden_states_179_cast_fp16 = silu(x = normed_output_59_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; + fp16 lconv1ds_1_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_1_linear_end_input_min_to_fp16"), val = fp16(-0x1.fp+2)]; + fp16 lconv1ds_1_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_1_linear_end_input_max_to_fp16"), val = fp16(0x1.ecp+2)]; + tensor clip_47_cast_fp16 = clip(alpha = lconv1ds_1_linear_end_input_min_to_fp16, beta = lconv1ds_1_linear_end_input_max_to_fp16, x = hidden_states_179_cast_fp16)[name = string("clip_47_cast_fp16")]; + tensor lconv1ds_1_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20084352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20608704))))[name = string("lconv1ds_1_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_1_linear_end_linear_weight_to_fp16_palettized, x = clip_47_cast_fp16)[name = string("linear_20_cast_fp16")]; + fp16 lconv1ds_1_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_1_linear_end_output_min_to_fp16"), val = fp16(-0x1.04p+3)]; + fp16 lconv1ds_1_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_1_linear_end_output_max_to_fp16"), val = fp16(0x1.02p+3)]; + tensor clip_48_cast_fp16 = clip(alpha = lconv1ds_1_linear_end_output_min_to_fp16, beta = lconv1ds_1_linear_end_output_max_to_fp16, x = linear_20_cast_fp16)[name = string("clip_48_cast_fp16")]; + tensor hidden_states_185_cast_fp16 = add(x = clip_48_cast_fp16, y = hidden_states_163_cast_fp16)[name = string("hidden_states_185_cast_fp16")]; + string hidden_states_185_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_185_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1033 = const()[name = string("op_1033"), val = fp32(-0x1p-1)]; + fp32 var_1034 = const()[name = string("op_1034"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_1035 = const()[name = string("op_1035"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_185_cast_fp16_to_fp32 = cast(dtype = hidden_states_185_cast_fp16_to_fp32_dtype_0, x = hidden_states_185_cast_fp16)[name = string("cast_463")]; + tensor clip_49 = clip(alpha = var_1035, beta = var_1034, x = hidden_states_185_cast_fp16_to_fp32)[name = string("clip_49")]; + fp32 var_1029_promoted = const()[name = string("op_1029_promoted"), val = fp32(0x1p+1)]; + tensor var_1043 = pow(x = clip_49, y = var_1029_promoted)[name = string("op_1043")]; + tensor var_1045_axes_0 = const()[name = string("op_1045_axes_0"), val = tensor([-1])]; + bool var_1045_keep_dims_0 = const()[name = string("op_1045_keep_dims_0"), val = bool(true)]; + tensor var_1045 = reduce_mean(axes = var_1045_axes_0, keep_dims = var_1045_keep_dims_0, x = var_1043)[name = string("op_1045")]; + string var_1045_to_fp16_dtype_0 = const()[name = string("op_1045_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1046_to_fp16 = const()[name = string("op_1046_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1045_to_fp16 = cast(dtype = var_1045_to_fp16_dtype_0, x = var_1045)[name = string("cast_462")]; + tensor mean_squared_31_cast_fp16 = add(x = var_1045_to_fp16, y = var_1046_to_fp16)[name = string("mean_squared_31_cast_fp16")]; + string mean_squared_31_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_31_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_31_cast_fp16_to_fp32 = cast(dtype = mean_squared_31_cast_fp16_to_fp32_dtype_0, x = mean_squared_31_cast_fp16)[name = string("cast_461")]; + tensor var_1048 = pow(x = mean_squared_31_cast_fp16_to_fp32, y = var_1033)[name = string("op_1048")]; + string clip_49_to_fp16_dtype_0 = const()[name = string("clip_49_to_fp16_dtype_0"), val = string("fp16")]; + string var_1048_to_fp16_dtype_0 = const()[name = string("op_1048_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_49_to_fp16 = cast(dtype = clip_49_to_fp16_dtype_0, x = clip_49)[name = string("cast_459")]; + tensor var_1048_to_fp16 = cast(dtype = var_1048_to_fp16_dtype_0, x = var_1048)[name = string("cast_460")]; + tensor normed_output_61_cast_fp16 = mul(x = clip_49_to_fp16, y = var_1048_to_fp16)[name = string("normed_output_61_cast_fp16")]; + tensor const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20609792)))]; + tensor normed_output_63_cast_fp16 = mul(x = normed_output_61_cast_fp16, y = const_29_to_fp16)[name = string("normed_output_63_cast_fp16")]; + fp16 feed_forward2s_1_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_1_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.74p+3)]; + fp16 feed_forward2s_1_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_1_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.72p+3)]; + tensor clip_50_cast_fp16 = clip(alpha = feed_forward2s_1_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_1_ffw_layer_1_input_max_to_fp16, x = normed_output_63_cast_fp16)[name = string("clip_50_cast_fp16")]; + tensor feed_forward2s_1_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20611904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22709120))))[name = string("feed_forward2s_1_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_1_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_50_cast_fp16)[name = string("linear_21_cast_fp16")]; + fp16 feed_forward2s_1_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_1_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.bap+4)]; + fp16 feed_forward2s_1_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_1_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.b8p+4)]; + tensor clip_51_cast_fp16 = clip(alpha = feed_forward2s_1_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_1_ffw_layer_1_output_max_to_fp16, x = linear_21_cast_fp16)[name = string("clip_51_cast_fp16")]; + tensor hidden_states_195_cast_fp16 = silu(x = clip_51_cast_fp16)[name = string("hidden_states_195_cast_fp16")]; + fp16 feed_forward2s_1_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_1_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.36p+3)]; + fp16 feed_forward2s_1_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_1_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.32p+3)]; + tensor clip_52_cast_fp16 = clip(alpha = feed_forward2s_1_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_1_ffw_layer_2_input_max_to_fp16, x = hidden_states_195_cast_fp16)[name = string("clip_52_cast_fp16")]; + tensor feed_forward2s_1_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22713280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24810496))))[name = string("feed_forward2s_1_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_1_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_52_cast_fp16)[name = string("linear_22_cast_fp16")]; + fp16 feed_forward2s_1_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_1_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.32p+5)]; + fp16 feed_forward2s_1_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_1_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.2ep+5)]; + tensor clip_53_cast_fp16 = clip(alpha = feed_forward2s_1_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_1_ffw_layer_2_output_max_to_fp16, x = linear_22_cast_fp16)[name = string("clip_53_cast_fp16")]; + string clip_53_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_53_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_53_cast_fp16_to_fp32 = cast(dtype = clip_53_cast_fp16_to_fp32_dtype_0, x = clip_53_cast_fp16)[name = string("cast_458")]; + tensor clip_54 = clip(alpha = var_1035, beta = var_1034, x = clip_53_cast_fp16_to_fp32)[name = string("clip_54")]; + fp32 var_1029_promoted_1 = const()[name = string("op_1029_promoted_1"), val = fp32(0x1p+1)]; + tensor var_1075 = pow(x = clip_54, y = var_1029_promoted_1)[name = string("op_1075")]; + tensor var_1077_axes_0 = const()[name = string("op_1077_axes_0"), val = tensor([-1])]; + bool var_1077_keep_dims_0 = const()[name = string("op_1077_keep_dims_0"), val = bool(true)]; + tensor var_1077 = reduce_mean(axes = var_1077_axes_0, keep_dims = var_1077_keep_dims_0, x = var_1075)[name = string("op_1077")]; + string var_1077_to_fp16_dtype_0 = const()[name = string("op_1077_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1078_to_fp16 = const()[name = string("op_1078_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1077_to_fp16 = cast(dtype = var_1077_to_fp16_dtype_0, x = var_1077)[name = string("cast_457")]; + tensor mean_squared_33_cast_fp16 = add(x = var_1077_to_fp16, y = var_1078_to_fp16)[name = string("mean_squared_33_cast_fp16")]; + string mean_squared_33_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_33_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_33_cast_fp16_to_fp32 = cast(dtype = mean_squared_33_cast_fp16_to_fp32_dtype_0, x = mean_squared_33_cast_fp16)[name = string("cast_456")]; + tensor var_1080 = pow(x = mean_squared_33_cast_fp16_to_fp32, y = var_1033)[name = string("op_1080")]; + string clip_54_to_fp16_dtype_0 = const()[name = string("clip_54_to_fp16_dtype_0"), val = string("fp16")]; + string var_1080_to_fp16_dtype_0 = const()[name = string("op_1080_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_54_to_fp16 = cast(dtype = clip_54_to_fp16_dtype_0, x = clip_54)[name = string("cast_454")]; + tensor var_1080_to_fp16 = cast(dtype = var_1080_to_fp16_dtype_0, x = var_1080)[name = string("cast_455")]; + tensor normed_output_65_cast_fp16 = mul(x = clip_54_to_fp16, y = var_1080_to_fp16)[name = string("normed_output_65_cast_fp16")]; + tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24811584)))]; + tensor normed_output_67_cast_fp16 = mul(x = normed_output_65_cast_fp16, y = const_30_to_fp16)[name = string("normed_output_67_cast_fp16")]; + fp16 var_1025_to_fp16 = const()[name = string("op_1025_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_207_cast_fp16 = mul(x = normed_output_67_cast_fp16, y = var_1025_to_fp16)[name = string("hidden_states_207_cast_fp16")]; + tensor hidden_states_209_cast_fp16 = add(x = hidden_states_207_cast_fp16, y = hidden_states_185_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; + fp16 var_1087_to_fp16 = const()[name = string("op_1087_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_1088_to_fp16 = const()[name = string("op_1088_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_55_cast_fp16 = clip(alpha = var_1087_to_fp16, beta = var_1088_to_fp16, x = hidden_states_209_cast_fp16)[name = string("clip_55_cast_fp16")]; + string clip_55_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_55_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1090 = const()[name = string("op_1090"), val = fp32(-0x1p-1)]; + fp32 var_1094_promoted = const()[name = string("op_1094_promoted"), val = fp32(0x1p+1)]; + tensor clip_55_cast_fp16_to_fp32 = cast(dtype = clip_55_cast_fp16_to_fp32_dtype_0, x = clip_55_cast_fp16)[name = string("cast_453")]; + tensor var_1100 = pow(x = clip_55_cast_fp16_to_fp32, y = var_1094_promoted)[name = string("op_1100")]; + tensor var_1102_axes_0 = const()[name = string("op_1102_axes_0"), val = tensor([-1])]; + bool var_1102_keep_dims_0 = const()[name = string("op_1102_keep_dims_0"), val = bool(true)]; + tensor var_1102 = reduce_mean(axes = var_1102_axes_0, keep_dims = var_1102_keep_dims_0, x = var_1100)[name = string("op_1102")]; + string var_1102_to_fp16_dtype_0 = const()[name = string("op_1102_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1103_to_fp16 = const()[name = string("op_1103_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1102_to_fp16 = cast(dtype = var_1102_to_fp16_dtype_0, x = var_1102)[name = string("cast_452")]; + tensor mean_squared_35_cast_fp16 = add(x = var_1102_to_fp16, y = var_1103_to_fp16)[name = string("mean_squared_35_cast_fp16")]; + string mean_squared_35_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_35_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_35_cast_fp16_to_fp32 = cast(dtype = mean_squared_35_cast_fp16_to_fp32_dtype_0, x = mean_squared_35_cast_fp16)[name = string("cast_451")]; + tensor var_1105 = pow(x = mean_squared_35_cast_fp16_to_fp32, y = var_1090)[name = string("op_1105")]; + string var_1105_to_fp16_dtype_0 = const()[name = string("op_1105_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1105_to_fp16 = cast(dtype = var_1105_to_fp16_dtype_0, x = var_1105)[name = string("cast_450")]; + tensor normed_output_69_cast_fp16 = mul(x = clip_55_cast_fp16, y = var_1105_to_fp16)[name = string("normed_output_69_cast_fp16")]; + tensor const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24813696)))]; + tensor normed_output_71_cast_fp16 = mul(x = normed_output_69_cast_fp16, y = const_31_to_fp16)[name = string("normed_output_71_cast_fp16")]; + string normed_output_71_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_71_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1118 = const()[name = string("op_1118"), val = fp32(-0x1p-1)]; + fp32 var_1119 = const()[name = string("op_1119"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_1120 = const()[name = string("op_1120"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_71_cast_fp16_to_fp32 = cast(dtype = normed_output_71_cast_fp16_to_fp32_dtype_0, x = normed_output_71_cast_fp16)[name = string("cast_449")]; + tensor clip_56 = clip(alpha = var_1120, beta = var_1119, x = normed_output_71_cast_fp16_to_fp32)[name = string("clip_56")]; + fp32 var_1114_promoted = const()[name = string("op_1114_promoted"), val = fp32(0x1p+1)]; + tensor var_1128 = pow(x = clip_56, y = var_1114_promoted)[name = string("op_1128")]; + tensor var_1130_axes_0 = const()[name = string("op_1130_axes_0"), val = tensor([-1])]; + bool var_1130_keep_dims_0 = const()[name = string("op_1130_keep_dims_0"), val = bool(true)]; + tensor var_1130 = reduce_mean(axes = var_1130_axes_0, keep_dims = var_1130_keep_dims_0, x = var_1128)[name = string("op_1130")]; + string var_1130_to_fp16_dtype_0 = const()[name = string("op_1130_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1131_to_fp16 = const()[name = string("op_1131_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1130_to_fp16 = cast(dtype = var_1130_to_fp16_dtype_0, x = var_1130)[name = string("cast_448")]; + tensor mean_squared_37_cast_fp16 = add(x = var_1130_to_fp16, y = var_1131_to_fp16)[name = string("mean_squared_37_cast_fp16")]; + string mean_squared_37_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_37_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_37_cast_fp16_to_fp32 = cast(dtype = mean_squared_37_cast_fp16_to_fp32_dtype_0, x = mean_squared_37_cast_fp16)[name = string("cast_447")]; + tensor var_1133 = pow(x = mean_squared_37_cast_fp16_to_fp32, y = var_1118)[name = string("op_1133")]; + string clip_56_to_fp16_dtype_0 = const()[name = string("clip_56_to_fp16_dtype_0"), val = string("fp16")]; + string var_1133_to_fp16_dtype_0 = const()[name = string("op_1133_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_56_to_fp16 = cast(dtype = clip_56_to_fp16_dtype_0, x = clip_56)[name = string("cast_445")]; + tensor var_1133_to_fp16 = cast(dtype = var_1133_to_fp16_dtype_0, x = var_1133)[name = string("cast_446")]; + tensor normed_output_73_cast_fp16 = mul(x = clip_56_to_fp16, y = var_1133_to_fp16)[name = string("normed_output_73_cast_fp16")]; + tensor const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24815808)))]; + tensor normed_output_75_cast_fp16 = mul(x = normed_output_73_cast_fp16, y = const_32_to_fp16)[name = string("normed_output_75_cast_fp16")]; + fp16 feed_forward1s_2_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_2_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.ap+3)]; + fp16 feed_forward1s_2_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_2_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.9cp+3)]; + tensor clip_57_cast_fp16 = clip(alpha = feed_forward1s_2_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_2_ffw_layer_1_input_max_to_fp16, x = normed_output_75_cast_fp16)[name = string("clip_57_cast_fp16")]; + tensor feed_forward1s_2_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24817920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26915136))))[name = string("feed_forward1s_2_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_2_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_57_cast_fp16)[name = string("linear_23_cast_fp16")]; + fp16 feed_forward1s_2_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_2_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.c6p+4)]; + fp16 feed_forward1s_2_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_2_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.c2p+4)]; + tensor clip_58_cast_fp16 = clip(alpha = feed_forward1s_2_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_2_ffw_layer_1_output_max_to_fp16, x = linear_23_cast_fp16)[name = string("clip_58_cast_fp16")]; + tensor hidden_states_225_cast_fp16 = silu(x = clip_58_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; + fp16 feed_forward1s_2_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_2_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.28p+3)]; + fp16 feed_forward1s_2_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_2_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.26p+3)]; + tensor clip_59_cast_fp16 = clip(alpha = feed_forward1s_2_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_2_ffw_layer_2_input_max_to_fp16, x = hidden_states_225_cast_fp16)[name = string("clip_59_cast_fp16")]; + tensor feed_forward1s_2_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26919296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29016512))))[name = string("feed_forward1s_2_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_2_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_59_cast_fp16)[name = string("linear_24_cast_fp16")]; + fp16 feed_forward1s_2_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_2_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.2p+5)]; + fp16 feed_forward1s_2_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_2_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.1ep+5)]; + tensor clip_60_cast_fp16 = clip(alpha = feed_forward1s_2_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_2_ffw_layer_2_output_max_to_fp16, x = linear_24_cast_fp16)[name = string("clip_60_cast_fp16")]; + string clip_60_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_60_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_60_cast_fp16_to_fp32 = cast(dtype = clip_60_cast_fp16_to_fp32_dtype_0, x = clip_60_cast_fp16)[name = string("cast_444")]; + tensor clip_61 = clip(alpha = var_1120, beta = var_1119, x = clip_60_cast_fp16_to_fp32)[name = string("clip_61")]; + fp32 var_1114_promoted_1 = const()[name = string("op_1114_promoted_1"), val = fp32(0x1p+1)]; + tensor var_1160 = pow(x = clip_61, y = var_1114_promoted_1)[name = string("op_1160")]; + tensor var_1162_axes_0 = const()[name = string("op_1162_axes_0"), val = tensor([-1])]; + bool var_1162_keep_dims_0 = const()[name = string("op_1162_keep_dims_0"), val = bool(true)]; + tensor var_1162 = reduce_mean(axes = var_1162_axes_0, keep_dims = var_1162_keep_dims_0, x = var_1160)[name = string("op_1162")]; + string var_1162_to_fp16_dtype_0 = const()[name = string("op_1162_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1163_to_fp16 = const()[name = string("op_1163_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1162_to_fp16 = cast(dtype = var_1162_to_fp16_dtype_0, x = var_1162)[name = string("cast_443")]; + tensor mean_squared_39_cast_fp16 = add(x = var_1162_to_fp16, y = var_1163_to_fp16)[name = string("mean_squared_39_cast_fp16")]; + string mean_squared_39_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_39_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_39_cast_fp16_to_fp32 = cast(dtype = mean_squared_39_cast_fp16_to_fp32_dtype_0, x = mean_squared_39_cast_fp16)[name = string("cast_442")]; + tensor var_1165 = pow(x = mean_squared_39_cast_fp16_to_fp32, y = var_1118)[name = string("op_1165")]; + string clip_61_to_fp16_dtype_0 = const()[name = string("clip_61_to_fp16_dtype_0"), val = string("fp16")]; + string var_1165_to_fp16_dtype_0 = const()[name = string("op_1165_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_61_to_fp16 = cast(dtype = clip_61_to_fp16_dtype_0, x = clip_61)[name = string("cast_440")]; + tensor var_1165_to_fp16 = cast(dtype = var_1165_to_fp16_dtype_0, x = var_1165)[name = string("cast_441")]; + tensor normed_output_77_cast_fp16 = mul(x = clip_61_to_fp16, y = var_1165_to_fp16)[name = string("normed_output_77_cast_fp16")]; + tensor const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29017600)))]; + tensor normed_output_79_cast_fp16 = mul(x = normed_output_77_cast_fp16, y = const_33_to_fp16)[name = string("normed_output_79_cast_fp16")]; + fp16 var_1110_to_fp16 = const()[name = string("op_1110_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_237_cast_fp16 = mul(x = normed_output_79_cast_fp16, y = var_1110_to_fp16)[name = string("hidden_states_237_cast_fp16")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_237_cast_fp16, y = normed_output_71_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + fp16 var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_62_cast_fp16 = clip(alpha = var_1172_to_fp16, beta = var_1173_to_fp16, x = hidden_states_239_cast_fp16)[name = string("clip_62_cast_fp16")]; + string clip_62_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_62_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1175 = const()[name = string("op_1175"), val = fp32(-0x1p-1)]; + fp32 var_1179_promoted = const()[name = string("op_1179_promoted"), val = fp32(0x1p+1)]; + tensor clip_62_cast_fp16_to_fp32 = cast(dtype = clip_62_cast_fp16_to_fp32_dtype_0, x = clip_62_cast_fp16)[name = string("cast_439")]; + tensor var_1185 = pow(x = clip_62_cast_fp16_to_fp32, y = var_1179_promoted)[name = string("op_1185")]; + tensor var_1187_axes_0 = const()[name = string("op_1187_axes_0"), val = tensor([-1])]; + bool var_1187_keep_dims_0 = const()[name = string("op_1187_keep_dims_0"), val = bool(true)]; + tensor var_1187 = reduce_mean(axes = var_1187_axes_0, keep_dims = var_1187_keep_dims_0, x = var_1185)[name = string("op_1187")]; + string var_1187_to_fp16_dtype_0 = const()[name = string("op_1187_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1187_to_fp16 = cast(dtype = var_1187_to_fp16_dtype_0, x = var_1187)[name = string("cast_438")]; + tensor mean_squared_41_cast_fp16 = add(x = var_1187_to_fp16, y = var_1188_to_fp16)[name = string("mean_squared_41_cast_fp16")]; + string mean_squared_41_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_41_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_41_cast_fp16_to_fp32 = cast(dtype = mean_squared_41_cast_fp16_to_fp32_dtype_0, x = mean_squared_41_cast_fp16)[name = string("cast_437")]; + tensor var_1190 = pow(x = mean_squared_41_cast_fp16_to_fp32, y = var_1175)[name = string("op_1190")]; + string var_1190_to_fp16_dtype_0 = const()[name = string("op_1190_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1190_to_fp16 = cast(dtype = var_1190_to_fp16_dtype_0, x = var_1190)[name = string("cast_436")]; + tensor normed_output_81_cast_fp16 = mul(x = clip_62_cast_fp16, y = var_1190_to_fp16)[name = string("normed_output_81_cast_fp16")]; + tensor const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29019712)))]; + tensor normed_output_83_cast_fp16 = mul(x = normed_output_81_cast_fp16, y = const_34_to_fp16)[name = string("normed_output_83_cast_fp16")]; + int32 var_1196 = const()[name = string("op_1196"), val = int32(-1)]; + fp32 var_1197 = const()[name = string("op_1197"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_2_q_proj_input_min_to_fp16 = const()[name = string("self_attns_2_q_proj_input_min_to_fp16"), val = fp16(-0x1.9p+3)]; + fp16 self_attns_2_q_proj_input_max_to_fp16 = const()[name = string("self_attns_2_q_proj_input_max_to_fp16"), val = fp16(0x1.8cp+3)]; + tensor clip_63_cast_fp16 = clip(alpha = self_attns_2_q_proj_input_min_to_fp16, beta = self_attns_2_q_proj_input_max_to_fp16, x = normed_output_83_cast_fp16)[name = string("clip_63_cast_fp16")]; + tensor self_attns_2_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29021824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29546176))))[name = string("self_attns_2_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_25_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_2_q_proj_linear_weight_to_fp16_palettized, x = clip_63_cast_fp16)[name = string("linear_25_cast_fp16")]; + fp16 self_attns_2_q_proj_output_min_to_fp16 = const()[name = string("self_attns_2_q_proj_output_min_to_fp16"), val = fp16(-0x1.4ep+4)]; + fp16 self_attns_2_q_proj_output_max_to_fp16 = const()[name = string("self_attns_2_q_proj_output_max_to_fp16"), val = fp16(0x1.4cp+4)]; + tensor clip_64_cast_fp16 = clip(alpha = self_attns_2_q_proj_output_min_to_fp16, beta = self_attns_2_q_proj_output_max_to_fp16, x = linear_25_cast_fp16)[name = string("clip_64_cast_fp16")]; + tensor var_1241 = const()[name = string("op_1241"), val = tensor([1, 50, 8, 128])]; + tensor q_5_cast_fp16 = reshape(shape = var_1241, x = clip_64_cast_fp16)[name = string("q_5_cast_fp16")]; + tensor self_attns_2_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29547264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30071616))))[name = string("self_attns_2_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_26_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_2_k_proj_linear_weight_to_fp16_palettized, x = clip_63_cast_fp16)[name = string("linear_26_cast_fp16")]; + fp16 self_attns_2_k_proj_output_min_to_fp16 = const()[name = string("self_attns_2_k_proj_output_min_to_fp16"), val = fp16(-0x1.4ep+4)]; + fp16 self_attns_2_k_proj_output_max_to_fp16 = const()[name = string("self_attns_2_k_proj_output_max_to_fp16"), val = fp16(0x1.4cp+4)]; + tensor clip_66_cast_fp16 = clip(alpha = self_attns_2_k_proj_output_min_to_fp16, beta = self_attns_2_k_proj_output_max_to_fp16, x = linear_26_cast_fp16)[name = string("clip_66_cast_fp16")]; + tensor var_1253 = const()[name = string("op_1253"), val = tensor([1, 50, 8, 128])]; + tensor k_5_cast_fp16 = reshape(shape = var_1253, x = clip_66_cast_fp16)[name = string("k_5_cast_fp16")]; + tensor self_attns_2_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30072704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30597056))))[name = string("self_attns_2_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_27_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_2_v_proj_linear_weight_to_fp16_palettized, x = clip_63_cast_fp16)[name = string("linear_27_cast_fp16")]; + fp16 self_attns_2_v_proj_output_min_to_fp16 = const()[name = string("self_attns_2_v_proj_output_min_to_fp16"), val = fp16(-0x1.4ep+4)]; + fp16 self_attns_2_v_proj_output_max_to_fp16 = const()[name = string("self_attns_2_v_proj_output_max_to_fp16"), val = fp16(0x1.4cp+4)]; + tensor clip_68_cast_fp16 = clip(alpha = self_attns_2_v_proj_output_min_to_fp16, beta = self_attns_2_v_proj_output_max_to_fp16, x = linear_27_cast_fp16)[name = string("clip_68_cast_fp16")]; + tensor var_1265 = const()[name = string("op_1265"), val = tensor([1, 50, 8, 128])]; + tensor input_117_cast_fp16 = reshape(shape = var_1265, x = clip_68_cast_fp16)[name = string("input_117_cast_fp16")]; + fp16 var_1267_to_fp16 = const()[name = string("op_1267_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_1268_cast_fp16 = mul(x = q_5_cast_fp16, y = var_1267_to_fp16)[name = string("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = string("op_1269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30598144)))]; + tensor input_113_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = string("input_113_cast_fp16")]; + fp16 var_1271_to_fp16 = const()[name = string("op_1271_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_115_cast_fp16 = mul(x = k_5_cast_fp16, y = var_1271_to_fp16)[name = string("input_115_cast_fp16")]; + tensor q_padded_5_pad_0 = const()[name = string("q_padded_5_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_5_mode_0 = const()[name = string("q_padded_5_mode_0"), val = string("constant")]; + fp16 const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_5_cast_fp16 = pad(constant_val = const_35_to_fp16, mode = q_padded_5_mode_0, pad = q_padded_5_pad_0, x = input_113_cast_fp16)[name = string("q_padded_5_cast_fp16")]; + tensor var_1275 = const()[name = string("op_1275"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_5_cast_fp16 = reshape(shape = var_1275, x = q_padded_5_cast_fp16)[name = string("q_blocks_5_cast_fp16")]; + tensor k_padded_5_pad_0 = const()[name = string("k_padded_5_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_5_mode_0 = const()[name = string("k_padded_5_mode_0"), val = string("constant")]; + fp16 const_36_to_fp16 = const()[name = string("const_36_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_5_cast_fp16 = pad(constant_val = const_36_to_fp16, mode = k_padded_5_mode_0, pad = k_padded_5_pad_0, x = input_115_cast_fp16)[name = string("k_padded_5_cast_fp16")]; + tensor v_padded_5_pad_0 = const()[name = string("v_padded_5_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_5_mode_0 = const()[name = string("v_padded_5_mode_0"), val = string("constant")]; + fp16 const_37_to_fp16 = const()[name = string("const_37_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_5_cast_fp16 = pad(constant_val = const_37_to_fp16, mode = v_padded_5_mode_0, pad = v_padded_5_pad_0, x = input_117_cast_fp16)[name = string("v_padded_5_cast_fp16")]; + tensor var_1282_begin_0 = const()[name = string("op_1282_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1282_end_0 = const()[name = string("op_1282_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_1282_end_mask_0 = const()[name = string("op_1282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1282_cast_fp16 = slice_by_index(begin = var_1282_begin_0, end = var_1282_end_0, end_mask = var_1282_end_mask_0, x = k_padded_5_cast_fp16)[name = string("op_1282_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = string("op_1284_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_1284_end_0 = const()[name = string("op_1284_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_1284_end_mask_0 = const()[name = string("op_1284_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_padded_5_cast_fp16)[name = string("op_1284_cast_fp16")]; + tensor var_1286_begin_0 = const()[name = string("op_1286_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_1286_end_0 = const()[name = string("op_1286_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_1286_end_mask_0 = const()[name = string("op_1286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1286_cast_fp16 = slice_by_index(begin = var_1286_begin_0, end = var_1286_end_0, end_mask = var_1286_end_mask_0, x = k_padded_5_cast_fp16)[name = string("op_1286_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = string("op_1288_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_1288_end_0 = const()[name = string("op_1288_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_1288_end_mask_0 = const()[name = string("op_1288_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_padded_5_cast_fp16)[name = string("op_1288_cast_fp16")]; + tensor var_1290_begin_0 = const()[name = string("op_1290_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_1290_end_0 = const()[name = string("op_1290_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_1290_end_mask_0 = const()[name = string("op_1290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1290_cast_fp16 = slice_by_index(begin = var_1290_begin_0, end = var_1290_end_0, end_mask = var_1290_end_mask_0, x = k_padded_5_cast_fp16)[name = string("op_1290_cast_fp16")]; + int32 k_blocks_5_axis_0 = const()[name = string("k_blocks_5_axis_0"), val = int32(1)]; + tensor k_blocks_5_cast_fp16 = stack(axis = k_blocks_5_axis_0, values = (var_1282_cast_fp16, var_1284_cast_fp16, var_1286_cast_fp16, var_1288_cast_fp16, var_1290_cast_fp16))[name = string("k_blocks_5_cast_fp16")]; + tensor var_1294_begin_0 = const()[name = string("op_1294_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1294_end_0 = const()[name = string("op_1294_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_1294_end_mask_0 = const()[name = string("op_1294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1294_cast_fp16 = slice_by_index(begin = var_1294_begin_0, end = var_1294_end_0, end_mask = var_1294_end_mask_0, x = v_padded_5_cast_fp16)[name = string("op_1294_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = string("op_1296_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_1296_end_0 = const()[name = string("op_1296_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_1296_end_mask_0 = const()[name = string("op_1296_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = v_padded_5_cast_fp16)[name = string("op_1296_cast_fp16")]; + tensor var_1298_begin_0 = const()[name = string("op_1298_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_1298_end_0 = const()[name = string("op_1298_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_1298_end_mask_0 = const()[name = string("op_1298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1298_cast_fp16 = slice_by_index(begin = var_1298_begin_0, end = var_1298_end_0, end_mask = var_1298_end_mask_0, x = v_padded_5_cast_fp16)[name = string("op_1298_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = string("op_1300_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_1300_end_0 = const()[name = string("op_1300_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_1300_end_mask_0 = const()[name = string("op_1300_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = v_padded_5_cast_fp16)[name = string("op_1300_cast_fp16")]; + tensor var_1302_begin_0 = const()[name = string("op_1302_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_1302_end_0 = const()[name = string("op_1302_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_1302_end_mask_0 = const()[name = string("op_1302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1302_cast_fp16 = slice_by_index(begin = var_1302_begin_0, end = var_1302_end_0, end_mask = var_1302_end_mask_0, x = v_padded_5_cast_fp16)[name = string("op_1302_cast_fp16")]; + int32 v_blocks_5_axis_0 = const()[name = string("v_blocks_5_axis_0"), val = int32(1)]; + tensor v_blocks_5_cast_fp16 = stack(axis = v_blocks_5_axis_0, values = (var_1294_cast_fp16, var_1296_cast_fp16, var_1298_cast_fp16, var_1300_cast_fp16, var_1302_cast_fp16))[name = string("v_blocks_5_cast_fp16")]; + tensor var_1310 = const()[name = string("op_1310"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_1312 = const()[name = string("op_1312"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_5_transpose_x_0 = const()[name = string("matrix_ac_5_transpose_x_0"), val = bool(false)]; + bool matrix_ac_5_transpose_y_0 = const()[name = string("matrix_ac_5_transpose_y_0"), val = bool(false)]; + tensor queries_5_cast_fp16 = transpose(perm = var_1310, x = q_blocks_5_cast_fp16)[name = string("transpose_58")]; + tensor keys_t_5_cast_fp16 = transpose(perm = var_1312, x = k_blocks_5_cast_fp16)[name = string("transpose_59")]; + tensor matrix_ac_5_cast_fp16 = matmul(transpose_x = matrix_ac_5_transpose_x_0, transpose_y = matrix_ac_5_transpose_y_0, x = queries_5_cast_fp16, y = keys_t_5_cast_fp16)[name = string("matrix_ac_5_cast_fp16")]; + tensor var_1315 = const()[name = string("op_1315"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_5_cast_fp16 = reshape(shape = var_1315, x = queries_5_cast_fp16)[name = string("q_flat_5_cast_fp16")]; + bool matrix_bd_21_transpose_x_0 = const()[name = string("matrix_bd_21_transpose_x_0"), val = bool(false)]; + bool matrix_bd_21_transpose_y_0 = const()[name = string("matrix_bd_21_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_5_to_fp16 = const()[name = string("rel_k_t_5_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30598464)))]; + tensor matrix_bd_21_cast_fp16 = matmul(transpose_x = matrix_bd_21_transpose_x_0, transpose_y = matrix_bd_21_transpose_y_0, x = q_flat_5_cast_fp16, y = rel_k_t_5_to_fp16)[name = string("matrix_bd_21_cast_fp16")]; + tensor var_1320 = const()[name = string("op_1320"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_119_cast_fp16 = reshape(shape = var_1320, x = matrix_bd_21_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor matrix_bd_23_pad_0 = const()[name = string("matrix_bd_23_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30625152)))]; + string matrix_bd_23_mode_0 = const()[name = string("matrix_bd_23_mode_0"), val = string("constant")]; + fp16 const_39_to_fp16 = const()[name = string("const_39_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_23_cast_fp16 = pad(constant_val = const_39_to_fp16, mode = matrix_bd_23_mode_0, pad = matrix_bd_23_pad_0, x = input_119_cast_fp16)[name = string("matrix_bd_23_cast_fp16")]; + tensor var_1324 = const()[name = string("op_1324"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_25_cast_fp16 = reshape(shape = var_1324, x = matrix_bd_23_cast_fp16)[name = string("matrix_bd_25_cast_fp16")]; + tensor matrix_bd_27_begin_0 = const()[name = string("matrix_bd_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_27_end_0 = const()[name = string("matrix_bd_27_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_27_end_mask_0 = const()[name = string("matrix_bd_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_27_cast_fp16 = slice_by_index(begin = matrix_bd_27_begin_0, end = matrix_bd_27_end_0, end_mask = matrix_bd_27_end_mask_0, x = matrix_bd_25_cast_fp16)[name = string("matrix_bd_27_cast_fp16")]; + tensor var_1330 = const()[name = string("op_1330"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_29_cast_fp16 = reshape(shape = var_1330, x = matrix_bd_27_cast_fp16)[name = string("matrix_bd_29_cast_fp16")]; + tensor attn_13_cast_fp16 = add(x = matrix_ac_5_cast_fp16, y = matrix_bd_29_cast_fp16)[name = string("attn_13_cast_fp16")]; + fp16 _inversed_1333_y_0_to_fp16 = const()[name = string("_inversed_1333_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_1333_cast_fp16 = mul(x = attn_13_cast_fp16, y = _inversed_1333_y_0_to_fp16)[name = string("_inversed_1333_cast_fp16")]; + string _inversed_1333_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_1333_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_1333_cast_fp16_to_fp32 = cast(dtype = _inversed_1333_cast_fp16_to_fp32_dtype_0, x = _inversed_1333_cast_fp16)[name = string("cast_435")]; + tensor var_1334 = tanh(x = _inversed_1333_cast_fp16_to_fp32)[name = string("op_1334")]; + string var_1334_to_fp16_dtype_0 = const()[name = string("op_1334_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_2_softcap_to_fp16 = const()[name = string("self_attns_2_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_1334_to_fp16 = cast(dtype = var_1334_to_fp16_dtype_0, x = var_1334)[name = string("cast_434")]; + tensor attn_15_cast_fp16 = mul(x = var_1334_to_fp16, y = self_attns_2_softcap_to_fp16)[name = string("attn_15_cast_fp16")]; + string attn_15_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_15_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_15_cast_fp16_to_fp32 = cast(dtype = attn_15_cast_fp16_to_fp32_dtype_0, x = attn_15_cast_fp16)[name = string("cast_433")]; + tensor input_121 = select(a = var_1197, b = attn_15_cast_fp16_to_fp32, cond = var_460)[name = string("input_121")]; + tensor var_1338 = softmax(axis = var_1196, x = input_121)[name = string("op_1338")]; + tensor var_1340 = const()[name = string("op_1340"), val = tensor([0, 3, 1, -3, -1])]; + bool out_13_transpose_x_0 = const()[name = string("out_13_transpose_x_0"), val = bool(false)]; + bool out_13_transpose_y_0 = const()[name = string("out_13_transpose_y_0"), val = bool(false)]; + string var_1338_to_fp16_dtype_0 = const()[name = string("op_1338_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_5_cast_fp16 = transpose(perm = var_1340, x = v_blocks_5_cast_fp16)[name = string("transpose_57")]; + tensor var_1338_to_fp16 = cast(dtype = var_1338_to_fp16_dtype_0, x = var_1338)[name = string("cast_432")]; + tensor out_13_cast_fp16 = matmul(transpose_x = out_13_transpose_x_0, transpose_y = out_13_transpose_y_0, x = var_1338_to_fp16, y = values_t_5_cast_fp16)[name = string("out_13_cast_fp16")]; + tensor var_1343 = const()[name = string("op_1343"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_1345 = const()[name = string("op_1345"), val = tensor([1, 60, 1024])]; + tensor var_1344_cast_fp16 = transpose(perm = var_1343, x = out_13_cast_fp16)[name = string("transpose_56")]; + tensor out_15_cast_fp16 = reshape(shape = var_1345, x = var_1344_cast_fp16)[name = string("out_15_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = string("op_1348_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1348_end_0 = const()[name = string("op_1348_end_0"), val = tensor([1, 50, 1024])]; + tensor var_1348_end_mask_0 = const()[name = string("op_1348_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = out_15_cast_fp16)[name = string("op_1348_cast_fp16")]; + fp16 self_attns_2_post_input_min_to_fp16 = const()[name = string("self_attns_2_post_input_min_to_fp16"), val = fp16(-0x1.22p+4)]; + fp16 self_attns_2_post_input_max_to_fp16 = const()[name = string("self_attns_2_post_input_max_to_fp16"), val = fp16(0x1.2p+4)]; + tensor clip_69_cast_fp16 = clip(alpha = self_attns_2_post_input_min_to_fp16, beta = self_attns_2_post_input_max_to_fp16, x = var_1348_cast_fp16)[name = string("clip_69_cast_fp16")]; + tensor self_attns_2_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30625280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31149632))))[name = string("self_attns_2_post_linear_weight_to_fp16_palettized")]; + tensor linear_29_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_2_post_linear_weight_to_fp16_palettized, x = clip_69_cast_fp16)[name = string("linear_29_cast_fp16")]; + fp16 self_attns_2_post_output_min_to_fp16 = const()[name = string("self_attns_2_post_output_min_to_fp16"), val = fp16(-0x1.eep+5)]; + fp16 self_attns_2_post_output_max_to_fp16 = const()[name = string("self_attns_2_post_output_max_to_fp16"), val = fp16(0x1.eap+5)]; + tensor clip_70_cast_fp16 = clip(alpha = self_attns_2_post_output_min_to_fp16, beta = self_attns_2_post_output_max_to_fp16, x = linear_29_cast_fp16)[name = string("clip_70_cast_fp16")]; + fp16 var_1360_to_fp16 = const()[name = string("op_1360_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_1361_to_fp16 = const()[name = string("op_1361_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_71_cast_fp16 = clip(alpha = var_1360_to_fp16, beta = var_1361_to_fp16, x = clip_70_cast_fp16)[name = string("clip_71_cast_fp16")]; + string clip_71_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_71_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1363 = const()[name = string("op_1363"), val = fp32(-0x1p-1)]; + fp32 var_1367_promoted = const()[name = string("op_1367_promoted"), val = fp32(0x1p+1)]; + tensor clip_71_cast_fp16_to_fp32 = cast(dtype = clip_71_cast_fp16_to_fp32_dtype_0, x = clip_71_cast_fp16)[name = string("cast_431")]; + tensor var_1373 = pow(x = clip_71_cast_fp16_to_fp32, y = var_1367_promoted)[name = string("op_1373")]; + tensor var_1375_axes_0 = const()[name = string("op_1375_axes_0"), val = tensor([-1])]; + bool var_1375_keep_dims_0 = const()[name = string("op_1375_keep_dims_0"), val = bool(true)]; + tensor var_1375 = reduce_mean(axes = var_1375_axes_0, keep_dims = var_1375_keep_dims_0, x = var_1373)[name = string("op_1375")]; + string var_1375_to_fp16_dtype_0 = const()[name = string("op_1375_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1376_to_fp16 = const()[name = string("op_1376_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1375_to_fp16 = cast(dtype = var_1375_to_fp16_dtype_0, x = var_1375)[name = string("cast_430")]; + tensor mean_squared_43_cast_fp16 = add(x = var_1375_to_fp16, y = var_1376_to_fp16)[name = string("mean_squared_43_cast_fp16")]; + string mean_squared_43_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_43_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_43_cast_fp16_to_fp32 = cast(dtype = mean_squared_43_cast_fp16_to_fp32_dtype_0, x = mean_squared_43_cast_fp16)[name = string("cast_429")]; + tensor var_1378 = pow(x = mean_squared_43_cast_fp16_to_fp32, y = var_1363)[name = string("op_1378")]; + string var_1378_to_fp16_dtype_0 = const()[name = string("op_1378_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1378_to_fp16 = cast(dtype = var_1378_to_fp16_dtype_0, x = var_1378)[name = string("cast_428")]; + tensor normed_output_85_cast_fp16 = mul(x = clip_71_cast_fp16, y = var_1378_to_fp16)[name = string("normed_output_85_cast_fp16")]; + tensor const_40_to_fp16 = const()[name = string("const_40_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31150720)))]; + tensor normed_output_87_cast_fp16 = mul(x = normed_output_85_cast_fp16, y = const_40_to_fp16)[name = string("normed_output_87_cast_fp16")]; + tensor hidden_states_265_cast_fp16 = add(x = normed_output_87_cast_fp16, y = hidden_states_239_cast_fp16)[name = string("hidden_states_265_cast_fp16")]; + string hidden_states_265_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_265_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1385 = const()[name = string("op_1385"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_1386 = const()[name = string("op_1386"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_1398 = const()[name = string("op_1398"), val = fp32(-0x1p-1)]; + fp32 var_1394_promoted = const()[name = string("op_1394_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_265_cast_fp16_to_fp32 = cast(dtype = hidden_states_265_cast_fp16_to_fp32_dtype_0, x = hidden_states_265_cast_fp16)[name = string("cast_427")]; + tensor var_1406 = pow(x = hidden_states_265_cast_fp16_to_fp32, y = var_1394_promoted)[name = string("op_1406")]; + tensor var_1408_axes_0 = const()[name = string("op_1408_axes_0"), val = tensor([-1])]; + bool var_1408_keep_dims_0 = const()[name = string("op_1408_keep_dims_0"), val = bool(true)]; + tensor var_1408 = reduce_mean(axes = var_1408_axes_0, keep_dims = var_1408_keep_dims_0, x = var_1406)[name = string("op_1408")]; + string var_1408_to_fp16_dtype_0 = const()[name = string("op_1408_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1409_to_fp16 = const()[name = string("op_1409_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1408_to_fp16 = cast(dtype = var_1408_to_fp16_dtype_0, x = var_1408)[name = string("cast_426")]; + tensor mean_squared_45_cast_fp16 = add(x = var_1408_to_fp16, y = var_1409_to_fp16)[name = string("mean_squared_45_cast_fp16")]; + string mean_squared_45_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_45_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_45_cast_fp16_to_fp32 = cast(dtype = mean_squared_45_cast_fp16_to_fp32_dtype_0, x = mean_squared_45_cast_fp16)[name = string("cast_425")]; + tensor var_1411 = pow(x = mean_squared_45_cast_fp16_to_fp32, y = var_1398)[name = string("op_1411")]; + string var_1411_to_fp16_dtype_0 = const()[name = string("op_1411_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1411_to_fp16 = cast(dtype = var_1411_to_fp16_dtype_0, x = var_1411)[name = string("cast_424")]; + tensor normed_output_89_cast_fp16 = mul(x = hidden_states_265_cast_fp16, y = var_1411_to_fp16)[name = string("normed_output_89_cast_fp16")]; + tensor const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31152832)))]; + tensor normed_output_91_cast_fp16 = mul(x = normed_output_89_cast_fp16, y = const_41_to_fp16)[name = string("normed_output_91_cast_fp16")]; + fp16 lconv1ds_2_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_2_linear_start_input_min_to_fp16"), val = fp16(-0x1.7cp+3)]; + fp16 lconv1ds_2_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_2_linear_start_input_max_to_fp16"), val = fp16(0x1.78p+3)]; + tensor clip_72_cast_fp16 = clip(alpha = lconv1ds_2_linear_start_input_min_to_fp16, beta = lconv1ds_2_linear_start_input_max_to_fp16, x = normed_output_91_cast_fp16)[name = string("clip_72_cast_fp16")]; + tensor lconv1ds_2_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31154944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32203584))))[name = string("lconv1ds_2_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_30_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_2_linear_start_linear_weight_to_fp16_palettized, x = clip_72_cast_fp16)[name = string("linear_30_cast_fp16")]; + fp16 lconv1ds_2_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_2_linear_start_output_min_to_fp16"), val = fp16(-0x1.a8p+4)]; + fp16 lconv1ds_2_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_2_linear_start_output_max_to_fp16"), val = fp16(0x1.a4p+4)]; + tensor clip_73_cast_fp16 = clip(alpha = lconv1ds_2_linear_start_output_min_to_fp16, beta = lconv1ds_2_linear_start_output_max_to_fp16, x = linear_30_cast_fp16)[name = string("clip_73_cast_fp16")]; + int32 hidden_states_273_split_num_splits_0 = const()[name = string("hidden_states_273_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_273_split_axis_0 = const()[name = string("hidden_states_273_split_axis_0"), val = int32(-1)]; + tensor hidden_states_273_split_cast_fp16_0, tensor hidden_states_273_split_cast_fp16_1 = split(axis = hidden_states_273_split_axis_0, num_splits = hidden_states_273_split_num_splits_0, x = clip_73_cast_fp16)[name = string("hidden_states_273_split_cast_fp16")]; + tensor hidden_states_273_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_273_split_cast_fp16_1)[name = string("hidden_states_273_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_273_cast_fp16 = mul(x = hidden_states_273_split_cast_fp16_0, y = hidden_states_273_split_1_sigmoid_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; + tensor input_129_perm_0 = const()[name = string("input_129_perm_0"), val = tensor([0, 2, 1])]; + tensor input_131_pad_0 = const()[name = string("input_131_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_131_mode_0 = const()[name = string("input_131_mode_0"), val = string("constant")]; + fp16 const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = fp16(0x0p+0)]; + tensor input_129_cast_fp16 = transpose(perm = input_129_perm_0, x = hidden_states_273_cast_fp16)[name = string("transpose_55")]; + tensor input_131_cast_fp16 = pad(constant_val = const_42_to_fp16, mode = input_131_mode_0, pad = input_131_pad_0, x = input_129_cast_fp16)[name = string("input_131_cast_fp16")]; + string var_1437_pad_type_0 = const()[name = string("op_1437_pad_type_0"), val = string("valid")]; + int32 var_1437_groups_0 = const()[name = string("op_1437_groups_0"), val = int32(1024)]; + tensor var_1437_strides_0 = const()[name = string("op_1437_strides_0"), val = tensor([1])]; + tensor var_1437_pad_0 = const()[name = string("op_1437_pad_0"), val = tensor([0, 0])]; + tensor var_1437_dilations_0 = const()[name = string("op_1437_dilations_0"), val = tensor([1])]; + tensor lconv1ds_2_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32205696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32208320))))[name = string("lconv1ds_2_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_1437_cast_fp16 = conv(dilations = var_1437_dilations_0, groups = var_1437_groups_0, pad = var_1437_pad_0, pad_type = var_1437_pad_type_0, strides = var_1437_strides_0, weight = lconv1ds_2_depthwise_conv1d_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("op_1437_cast_fp16")]; + tensor hidden_states_275_perm_0 = const()[name = string("hidden_states_275_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_275_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_275_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_275_cast_fp16 = transpose(perm = hidden_states_275_perm_0, x = var_1437_cast_fp16)[name = string("transpose_54")]; + tensor hidden_states_275_cast_fp16_to_fp32 = cast(dtype = hidden_states_275_cast_fp16_to_fp32_dtype_0, x = hidden_states_275_cast_fp16)[name = string("cast_423")]; + tensor clip_74 = clip(alpha = var_1386, beta = var_1385, x = hidden_states_275_cast_fp16_to_fp32)[name = string("clip_74")]; + fp32 var_1394_promoted_1 = const()[name = string("op_1394_promoted_1"), val = fp32(0x1p+1)]; + tensor var_1442 = pow(x = clip_74, y = var_1394_promoted_1)[name = string("op_1442")]; + tensor var_1444_axes_0 = const()[name = string("op_1444_axes_0"), val = tensor([-1])]; + bool var_1444_keep_dims_0 = const()[name = string("op_1444_keep_dims_0"), val = bool(true)]; + tensor var_1444 = reduce_mean(axes = var_1444_axes_0, keep_dims = var_1444_keep_dims_0, x = var_1442)[name = string("op_1444")]; + string var_1444_to_fp16_dtype_0 = const()[name = string("op_1444_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1445_to_fp16 = const()[name = string("op_1445_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1444_to_fp16 = cast(dtype = var_1444_to_fp16_dtype_0, x = var_1444)[name = string("cast_422")]; + tensor mean_squared_47_cast_fp16 = add(x = var_1444_to_fp16, y = var_1445_to_fp16)[name = string("mean_squared_47_cast_fp16")]; + string mean_squared_47_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_47_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_47_cast_fp16_to_fp32 = cast(dtype = mean_squared_47_cast_fp16_to_fp32_dtype_0, x = mean_squared_47_cast_fp16)[name = string("cast_421")]; + tensor var_1447 = pow(x = mean_squared_47_cast_fp16_to_fp32, y = var_1398)[name = string("op_1447")]; + string clip_74_to_fp16_dtype_0 = const()[name = string("clip_74_to_fp16_dtype_0"), val = string("fp16")]; + string var_1447_to_fp16_dtype_0 = const()[name = string("op_1447_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_74_to_fp16 = cast(dtype = clip_74_to_fp16_dtype_0, x = clip_74)[name = string("cast_419")]; + tensor var_1447_to_fp16 = cast(dtype = var_1447_to_fp16_dtype_0, x = var_1447)[name = string("cast_420")]; + tensor normed_output_93_cast_fp16 = mul(x = clip_74_to_fp16, y = var_1447_to_fp16)[name = string("normed_output_93_cast_fp16")]; + tensor const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32209408)))]; + tensor normed_output_95_cast_fp16 = mul(x = normed_output_93_cast_fp16, y = const_43_to_fp16)[name = string("normed_output_95_cast_fp16")]; + tensor hidden_states_281_cast_fp16 = silu(x = normed_output_95_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; + fp16 lconv1ds_2_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_2_linear_end_input_min_to_fp16"), val = fp16(-0x1.9p+2)]; + fp16 lconv1ds_2_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_2_linear_end_input_max_to_fp16"), val = fp16(0x1.8ep+2)]; + tensor clip_75_cast_fp16 = clip(alpha = lconv1ds_2_linear_end_input_min_to_fp16, beta = lconv1ds_2_linear_end_input_max_to_fp16, x = hidden_states_281_cast_fp16)[name = string("clip_75_cast_fp16")]; + tensor lconv1ds_2_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32211520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32735872))))[name = string("lconv1ds_2_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_31_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_2_linear_end_linear_weight_to_fp16_palettized, x = clip_75_cast_fp16)[name = string("linear_31_cast_fp16")]; + fp16 lconv1ds_2_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_2_linear_end_output_min_to_fp16"), val = fp16(-0x1.98p+2)]; + fp16 lconv1ds_2_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_2_linear_end_output_max_to_fp16"), val = fp16(0x1.94p+2)]; + tensor clip_76_cast_fp16 = clip(alpha = lconv1ds_2_linear_end_output_min_to_fp16, beta = lconv1ds_2_linear_end_output_max_to_fp16, x = linear_31_cast_fp16)[name = string("clip_76_cast_fp16")]; + tensor hidden_states_287_cast_fp16 = add(x = clip_76_cast_fp16, y = hidden_states_265_cast_fp16)[name = string("hidden_states_287_cast_fp16")]; + string hidden_states_287_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_287_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1471 = const()[name = string("op_1471"), val = fp32(-0x1p-1)]; + fp32 var_1472 = const()[name = string("op_1472"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_1473 = const()[name = string("op_1473"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_287_cast_fp16_to_fp32 = cast(dtype = hidden_states_287_cast_fp16_to_fp32_dtype_0, x = hidden_states_287_cast_fp16)[name = string("cast_418")]; + tensor clip_77 = clip(alpha = var_1473, beta = var_1472, x = hidden_states_287_cast_fp16_to_fp32)[name = string("clip_77")]; + fp32 var_1467_promoted = const()[name = string("op_1467_promoted"), val = fp32(0x1p+1)]; + tensor var_1481 = pow(x = clip_77, y = var_1467_promoted)[name = string("op_1481")]; + tensor var_1483_axes_0 = const()[name = string("op_1483_axes_0"), val = tensor([-1])]; + bool var_1483_keep_dims_0 = const()[name = string("op_1483_keep_dims_0"), val = bool(true)]; + tensor var_1483 = reduce_mean(axes = var_1483_axes_0, keep_dims = var_1483_keep_dims_0, x = var_1481)[name = string("op_1483")]; + string var_1483_to_fp16_dtype_0 = const()[name = string("op_1483_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1484_to_fp16 = const()[name = string("op_1484_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1483_to_fp16 = cast(dtype = var_1483_to_fp16_dtype_0, x = var_1483)[name = string("cast_417")]; + tensor mean_squared_49_cast_fp16 = add(x = var_1483_to_fp16, y = var_1484_to_fp16)[name = string("mean_squared_49_cast_fp16")]; + string mean_squared_49_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_49_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_49_cast_fp16_to_fp32 = cast(dtype = mean_squared_49_cast_fp16_to_fp32_dtype_0, x = mean_squared_49_cast_fp16)[name = string("cast_416")]; + tensor var_1486 = pow(x = mean_squared_49_cast_fp16_to_fp32, y = var_1471)[name = string("op_1486")]; + string clip_77_to_fp16_dtype_0 = const()[name = string("clip_77_to_fp16_dtype_0"), val = string("fp16")]; + string var_1486_to_fp16_dtype_0 = const()[name = string("op_1486_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_77_to_fp16 = cast(dtype = clip_77_to_fp16_dtype_0, x = clip_77)[name = string("cast_414")]; + tensor var_1486_to_fp16 = cast(dtype = var_1486_to_fp16_dtype_0, x = var_1486)[name = string("cast_415")]; + tensor normed_output_97_cast_fp16 = mul(x = clip_77_to_fp16, y = var_1486_to_fp16)[name = string("normed_output_97_cast_fp16")]; + tensor const_44_to_fp16 = const()[name = string("const_44_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32736960)))]; + tensor normed_output_99_cast_fp16 = mul(x = normed_output_97_cast_fp16, y = const_44_to_fp16)[name = string("normed_output_99_cast_fp16")]; + fp16 feed_forward2s_2_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_2_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.7cp+3)]; + fp16 feed_forward2s_2_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_2_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.78p+3)]; + tensor clip_78_cast_fp16 = clip(alpha = feed_forward2s_2_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_2_ffw_layer_1_input_max_to_fp16, x = normed_output_99_cast_fp16)[name = string("clip_78_cast_fp16")]; + tensor feed_forward2s_2_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32739072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34836288))))[name = string("feed_forward2s_2_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_32_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_2_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_78_cast_fp16)[name = string("linear_32_cast_fp16")]; + fp16 feed_forward2s_2_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_2_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.b8p+4)]; + fp16 feed_forward2s_2_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_2_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.b6p+4)]; + tensor clip_79_cast_fp16 = clip(alpha = feed_forward2s_2_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_2_ffw_layer_1_output_max_to_fp16, x = linear_32_cast_fp16)[name = string("clip_79_cast_fp16")]; + tensor hidden_states_297_cast_fp16 = silu(x = clip_79_cast_fp16)[name = string("hidden_states_297_cast_fp16")]; + fp16 feed_forward2s_2_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_2_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.28p+3)]; + fp16 feed_forward2s_2_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_2_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.26p+3)]; + tensor clip_80_cast_fp16 = clip(alpha = feed_forward2s_2_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_2_ffw_layer_2_input_max_to_fp16, x = hidden_states_297_cast_fp16)[name = string("clip_80_cast_fp16")]; + tensor feed_forward2s_2_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34840448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36937664))))[name = string("feed_forward2s_2_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_33_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_2_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_80_cast_fp16)[name = string("linear_33_cast_fp16")]; + fp16 feed_forward2s_2_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_2_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.38p+5)]; + fp16 feed_forward2s_2_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_2_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.36p+5)]; + tensor clip_81_cast_fp16 = clip(alpha = feed_forward2s_2_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_2_ffw_layer_2_output_max_to_fp16, x = linear_33_cast_fp16)[name = string("clip_81_cast_fp16")]; + string clip_81_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_81_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_81_cast_fp16_to_fp32 = cast(dtype = clip_81_cast_fp16_to_fp32_dtype_0, x = clip_81_cast_fp16)[name = string("cast_413")]; + tensor clip_82 = clip(alpha = var_1473, beta = var_1472, x = clip_81_cast_fp16_to_fp32)[name = string("clip_82")]; + fp32 var_1467_promoted_1 = const()[name = string("op_1467_promoted_1"), val = fp32(0x1p+1)]; + tensor var_1513 = pow(x = clip_82, y = var_1467_promoted_1)[name = string("op_1513")]; + tensor var_1515_axes_0 = const()[name = string("op_1515_axes_0"), val = tensor([-1])]; + bool var_1515_keep_dims_0 = const()[name = string("op_1515_keep_dims_0"), val = bool(true)]; + tensor var_1515 = reduce_mean(axes = var_1515_axes_0, keep_dims = var_1515_keep_dims_0, x = var_1513)[name = string("op_1515")]; + string var_1515_to_fp16_dtype_0 = const()[name = string("op_1515_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1516_to_fp16 = const()[name = string("op_1516_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1515_to_fp16 = cast(dtype = var_1515_to_fp16_dtype_0, x = var_1515)[name = string("cast_412")]; + tensor mean_squared_51_cast_fp16 = add(x = var_1515_to_fp16, y = var_1516_to_fp16)[name = string("mean_squared_51_cast_fp16")]; + string mean_squared_51_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_51_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_51_cast_fp16_to_fp32 = cast(dtype = mean_squared_51_cast_fp16_to_fp32_dtype_0, x = mean_squared_51_cast_fp16)[name = string("cast_411")]; + tensor var_1518 = pow(x = mean_squared_51_cast_fp16_to_fp32, y = var_1471)[name = string("op_1518")]; + string clip_82_to_fp16_dtype_0 = const()[name = string("clip_82_to_fp16_dtype_0"), val = string("fp16")]; + string var_1518_to_fp16_dtype_0 = const()[name = string("op_1518_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_82_to_fp16 = cast(dtype = clip_82_to_fp16_dtype_0, x = clip_82)[name = string("cast_409")]; + tensor var_1518_to_fp16 = cast(dtype = var_1518_to_fp16_dtype_0, x = var_1518)[name = string("cast_410")]; + tensor normed_output_101_cast_fp16 = mul(x = clip_82_to_fp16, y = var_1518_to_fp16)[name = string("normed_output_101_cast_fp16")]; + tensor const_45_to_fp16 = const()[name = string("const_45_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36938752)))]; + tensor normed_output_103_cast_fp16 = mul(x = normed_output_101_cast_fp16, y = const_45_to_fp16)[name = string("normed_output_103_cast_fp16")]; + fp16 var_1463_to_fp16 = const()[name = string("op_1463_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_309_cast_fp16 = mul(x = normed_output_103_cast_fp16, y = var_1463_to_fp16)[name = string("hidden_states_309_cast_fp16")]; + tensor hidden_states_311_cast_fp16 = add(x = hidden_states_309_cast_fp16, y = hidden_states_287_cast_fp16)[name = string("hidden_states_311_cast_fp16")]; + fp16 var_1525_to_fp16 = const()[name = string("op_1525_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_1526_to_fp16 = const()[name = string("op_1526_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_83_cast_fp16 = clip(alpha = var_1525_to_fp16, beta = var_1526_to_fp16, x = hidden_states_311_cast_fp16)[name = string("clip_83_cast_fp16")]; + string clip_83_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_83_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1528 = const()[name = string("op_1528"), val = fp32(-0x1p-1)]; + fp32 var_1532_promoted = const()[name = string("op_1532_promoted"), val = fp32(0x1p+1)]; + tensor clip_83_cast_fp16_to_fp32 = cast(dtype = clip_83_cast_fp16_to_fp32_dtype_0, x = clip_83_cast_fp16)[name = string("cast_408")]; + tensor var_1538 = pow(x = clip_83_cast_fp16_to_fp32, y = var_1532_promoted)[name = string("op_1538")]; + tensor var_1540_axes_0 = const()[name = string("op_1540_axes_0"), val = tensor([-1])]; + bool var_1540_keep_dims_0 = const()[name = string("op_1540_keep_dims_0"), val = bool(true)]; + tensor var_1540 = reduce_mean(axes = var_1540_axes_0, keep_dims = var_1540_keep_dims_0, x = var_1538)[name = string("op_1540")]; + string var_1540_to_fp16_dtype_0 = const()[name = string("op_1540_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1541_to_fp16 = const()[name = string("op_1541_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1540_to_fp16 = cast(dtype = var_1540_to_fp16_dtype_0, x = var_1540)[name = string("cast_407")]; + tensor mean_squared_53_cast_fp16 = add(x = var_1540_to_fp16, y = var_1541_to_fp16)[name = string("mean_squared_53_cast_fp16")]; + string mean_squared_53_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_53_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_53_cast_fp16_to_fp32 = cast(dtype = mean_squared_53_cast_fp16_to_fp32_dtype_0, x = mean_squared_53_cast_fp16)[name = string("cast_406")]; + tensor var_1543 = pow(x = mean_squared_53_cast_fp16_to_fp32, y = var_1528)[name = string("op_1543")]; + string var_1543_to_fp16_dtype_0 = const()[name = string("op_1543_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1543_to_fp16 = cast(dtype = var_1543_to_fp16_dtype_0, x = var_1543)[name = string("cast_405")]; + tensor normed_output_105_cast_fp16 = mul(x = clip_83_cast_fp16, y = var_1543_to_fp16)[name = string("normed_output_105_cast_fp16")]; + tensor const_46_to_fp16 = const()[name = string("const_46_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36940864)))]; + tensor normed_output_107_cast_fp16 = mul(x = normed_output_105_cast_fp16, y = const_46_to_fp16)[name = string("normed_output_107_cast_fp16")]; + string normed_output_107_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_107_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1556 = const()[name = string("op_1556"), val = fp32(-0x1p-1)]; + fp32 var_1557 = const()[name = string("op_1557"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_1558 = const()[name = string("op_1558"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_107_cast_fp16_to_fp32 = cast(dtype = normed_output_107_cast_fp16_to_fp32_dtype_0, x = normed_output_107_cast_fp16)[name = string("cast_404")]; + tensor clip_84 = clip(alpha = var_1558, beta = var_1557, x = normed_output_107_cast_fp16_to_fp32)[name = string("clip_84")]; + fp32 var_1552_promoted = const()[name = string("op_1552_promoted"), val = fp32(0x1p+1)]; + tensor var_1566 = pow(x = clip_84, y = var_1552_promoted)[name = string("op_1566")]; + tensor var_1568_axes_0 = const()[name = string("op_1568_axes_0"), val = tensor([-1])]; + bool var_1568_keep_dims_0 = const()[name = string("op_1568_keep_dims_0"), val = bool(true)]; + tensor var_1568 = reduce_mean(axes = var_1568_axes_0, keep_dims = var_1568_keep_dims_0, x = var_1566)[name = string("op_1568")]; + string var_1568_to_fp16_dtype_0 = const()[name = string("op_1568_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1569_to_fp16 = const()[name = string("op_1569_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1568_to_fp16 = cast(dtype = var_1568_to_fp16_dtype_0, x = var_1568)[name = string("cast_403")]; + tensor mean_squared_55_cast_fp16 = add(x = var_1568_to_fp16, y = var_1569_to_fp16)[name = string("mean_squared_55_cast_fp16")]; + string mean_squared_55_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_55_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_55_cast_fp16_to_fp32 = cast(dtype = mean_squared_55_cast_fp16_to_fp32_dtype_0, x = mean_squared_55_cast_fp16)[name = string("cast_402")]; + tensor var_1571 = pow(x = mean_squared_55_cast_fp16_to_fp32, y = var_1556)[name = string("op_1571")]; + string clip_84_to_fp16_dtype_0 = const()[name = string("clip_84_to_fp16_dtype_0"), val = string("fp16")]; + string var_1571_to_fp16_dtype_0 = const()[name = string("op_1571_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_84_to_fp16 = cast(dtype = clip_84_to_fp16_dtype_0, x = clip_84)[name = string("cast_400")]; + tensor var_1571_to_fp16 = cast(dtype = var_1571_to_fp16_dtype_0, x = var_1571)[name = string("cast_401")]; + tensor normed_output_109_cast_fp16 = mul(x = clip_84_to_fp16, y = var_1571_to_fp16)[name = string("normed_output_109_cast_fp16")]; + tensor const_47_to_fp16 = const()[name = string("const_47_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36942976)))]; + tensor normed_output_111_cast_fp16 = mul(x = normed_output_109_cast_fp16, y = const_47_to_fp16)[name = string("normed_output_111_cast_fp16")]; + fp16 feed_forward1s_3_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_3_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.8ap+3)]; + fp16 feed_forward1s_3_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_3_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.88p+3)]; + tensor clip_85_cast_fp16 = clip(alpha = feed_forward1s_3_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_3_ffw_layer_1_input_max_to_fp16, x = normed_output_111_cast_fp16)[name = string("clip_85_cast_fp16")]; + tensor feed_forward1s_3_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36945088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39042304))))[name = string("feed_forward1s_3_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_3_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_85_cast_fp16)[name = string("linear_34_cast_fp16")]; + fp16 feed_forward1s_3_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_3_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.bp+4)]; + fp16 feed_forward1s_3_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_3_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.acp+4)]; + tensor clip_86_cast_fp16 = clip(alpha = feed_forward1s_3_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_3_ffw_layer_1_output_max_to_fp16, x = linear_34_cast_fp16)[name = string("clip_86_cast_fp16")]; + tensor hidden_states_327_cast_fp16 = silu(x = clip_86_cast_fp16)[name = string("hidden_states_327_cast_fp16")]; + fp16 feed_forward1s_3_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_3_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.26p+3)]; + fp16 feed_forward1s_3_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_3_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.24p+3)]; + tensor clip_87_cast_fp16 = clip(alpha = feed_forward1s_3_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_3_ffw_layer_2_input_max_to_fp16, x = hidden_states_327_cast_fp16)[name = string("clip_87_cast_fp16")]; + tensor feed_forward1s_3_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39046464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41143680))))[name = string("feed_forward1s_3_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_3_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_87_cast_fp16)[name = string("linear_35_cast_fp16")]; + fp16 feed_forward1s_3_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_3_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.48p+5)]; + fp16 feed_forward1s_3_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_3_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.46p+5)]; + tensor clip_88_cast_fp16 = clip(alpha = feed_forward1s_3_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_3_ffw_layer_2_output_max_to_fp16, x = linear_35_cast_fp16)[name = string("clip_88_cast_fp16")]; + string clip_88_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_88_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_88_cast_fp16_to_fp32 = cast(dtype = clip_88_cast_fp16_to_fp32_dtype_0, x = clip_88_cast_fp16)[name = string("cast_399")]; + tensor clip_89 = clip(alpha = var_1558, beta = var_1557, x = clip_88_cast_fp16_to_fp32)[name = string("clip_89")]; + fp32 var_1552_promoted_1 = const()[name = string("op_1552_promoted_1"), val = fp32(0x1p+1)]; + tensor var_1598 = pow(x = clip_89, y = var_1552_promoted_1)[name = string("op_1598")]; + tensor var_1600_axes_0 = const()[name = string("op_1600_axes_0"), val = tensor([-1])]; + bool var_1600_keep_dims_0 = const()[name = string("op_1600_keep_dims_0"), val = bool(true)]; + tensor var_1600 = reduce_mean(axes = var_1600_axes_0, keep_dims = var_1600_keep_dims_0, x = var_1598)[name = string("op_1600")]; + string var_1600_to_fp16_dtype_0 = const()[name = string("op_1600_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1601_to_fp16 = const()[name = string("op_1601_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1600_to_fp16 = cast(dtype = var_1600_to_fp16_dtype_0, x = var_1600)[name = string("cast_398")]; + tensor mean_squared_57_cast_fp16 = add(x = var_1600_to_fp16, y = var_1601_to_fp16)[name = string("mean_squared_57_cast_fp16")]; + string mean_squared_57_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_57_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_57_cast_fp16_to_fp32 = cast(dtype = mean_squared_57_cast_fp16_to_fp32_dtype_0, x = mean_squared_57_cast_fp16)[name = string("cast_397")]; + tensor var_1603 = pow(x = mean_squared_57_cast_fp16_to_fp32, y = var_1556)[name = string("op_1603")]; + string clip_89_to_fp16_dtype_0 = const()[name = string("clip_89_to_fp16_dtype_0"), val = string("fp16")]; + string var_1603_to_fp16_dtype_0 = const()[name = string("op_1603_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_89_to_fp16 = cast(dtype = clip_89_to_fp16_dtype_0, x = clip_89)[name = string("cast_395")]; + tensor var_1603_to_fp16 = cast(dtype = var_1603_to_fp16_dtype_0, x = var_1603)[name = string("cast_396")]; + tensor normed_output_113_cast_fp16 = mul(x = clip_89_to_fp16, y = var_1603_to_fp16)[name = string("normed_output_113_cast_fp16")]; + tensor const_48_to_fp16 = const()[name = string("const_48_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41144768)))]; + tensor normed_output_115_cast_fp16 = mul(x = normed_output_113_cast_fp16, y = const_48_to_fp16)[name = string("normed_output_115_cast_fp16")]; + fp16 var_1548_to_fp16 = const()[name = string("op_1548_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_339_cast_fp16 = mul(x = normed_output_115_cast_fp16, y = var_1548_to_fp16)[name = string("hidden_states_339_cast_fp16")]; + tensor hidden_states_341_cast_fp16 = add(x = hidden_states_339_cast_fp16, y = normed_output_107_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + fp16 var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_1611_to_fp16 = const()[name = string("op_1611_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_90_cast_fp16 = clip(alpha = var_1610_to_fp16, beta = var_1611_to_fp16, x = hidden_states_341_cast_fp16)[name = string("clip_90_cast_fp16")]; + string clip_90_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_90_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1613 = const()[name = string("op_1613"), val = fp32(-0x1p-1)]; + fp32 var_1617_promoted = const()[name = string("op_1617_promoted"), val = fp32(0x1p+1)]; + tensor clip_90_cast_fp16_to_fp32 = cast(dtype = clip_90_cast_fp16_to_fp32_dtype_0, x = clip_90_cast_fp16)[name = string("cast_394")]; + tensor var_1623 = pow(x = clip_90_cast_fp16_to_fp32, y = var_1617_promoted)[name = string("op_1623")]; + tensor var_1625_axes_0 = const()[name = string("op_1625_axes_0"), val = tensor([-1])]; + bool var_1625_keep_dims_0 = const()[name = string("op_1625_keep_dims_0"), val = bool(true)]; + tensor var_1625 = reduce_mean(axes = var_1625_axes_0, keep_dims = var_1625_keep_dims_0, x = var_1623)[name = string("op_1625")]; + string var_1625_to_fp16_dtype_0 = const()[name = string("op_1625_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1626_to_fp16 = const()[name = string("op_1626_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1625_to_fp16 = cast(dtype = var_1625_to_fp16_dtype_0, x = var_1625)[name = string("cast_393")]; + tensor mean_squared_59_cast_fp16 = add(x = var_1625_to_fp16, y = var_1626_to_fp16)[name = string("mean_squared_59_cast_fp16")]; + string mean_squared_59_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_59_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_59_cast_fp16_to_fp32 = cast(dtype = mean_squared_59_cast_fp16_to_fp32_dtype_0, x = mean_squared_59_cast_fp16)[name = string("cast_392")]; + tensor var_1628 = pow(x = mean_squared_59_cast_fp16_to_fp32, y = var_1613)[name = string("op_1628")]; + string var_1628_to_fp16_dtype_0 = const()[name = string("op_1628_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1628_to_fp16 = cast(dtype = var_1628_to_fp16_dtype_0, x = var_1628)[name = string("cast_391")]; + tensor normed_output_117_cast_fp16 = mul(x = clip_90_cast_fp16, y = var_1628_to_fp16)[name = string("normed_output_117_cast_fp16")]; + tensor const_49_to_fp16 = const()[name = string("const_49_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41146880)))]; + tensor normed_output_119_cast_fp16 = mul(x = normed_output_117_cast_fp16, y = const_49_to_fp16)[name = string("normed_output_119_cast_fp16")]; + int32 var_1634 = const()[name = string("op_1634"), val = int32(-1)]; + fp32 var_1635 = const()[name = string("op_1635"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_3_q_proj_input_min_to_fp16 = const()[name = string("self_attns_3_q_proj_input_min_to_fp16"), val = fp16(-0x1.7ap+3)]; + fp16 self_attns_3_q_proj_input_max_to_fp16 = const()[name = string("self_attns_3_q_proj_input_max_to_fp16"), val = fp16(0x1.78p+3)]; + tensor clip_91_cast_fp16 = clip(alpha = self_attns_3_q_proj_input_min_to_fp16, beta = self_attns_3_q_proj_input_max_to_fp16, x = normed_output_119_cast_fp16)[name = string("clip_91_cast_fp16")]; + tensor self_attns_3_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41148992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41673344))))[name = string("self_attns_3_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_3_q_proj_linear_weight_to_fp16_palettized, x = clip_91_cast_fp16)[name = string("linear_36_cast_fp16")]; + fp16 self_attns_3_q_proj_output_min_to_fp16 = const()[name = string("self_attns_3_q_proj_output_min_to_fp16"), val = fp16(-0x1.2cp+4)]; + fp16 self_attns_3_q_proj_output_max_to_fp16 = const()[name = string("self_attns_3_q_proj_output_max_to_fp16"), val = fp16(0x1.2ap+4)]; + tensor clip_92_cast_fp16 = clip(alpha = self_attns_3_q_proj_output_min_to_fp16, beta = self_attns_3_q_proj_output_max_to_fp16, x = linear_36_cast_fp16)[name = string("clip_92_cast_fp16")]; + tensor var_1679 = const()[name = string("op_1679"), val = tensor([1, 50, 8, 128])]; + tensor q_7_cast_fp16 = reshape(shape = var_1679, x = clip_92_cast_fp16)[name = string("q_7_cast_fp16")]; + tensor self_attns_3_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41674432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42198784))))[name = string("self_attns_3_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_37_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_3_k_proj_linear_weight_to_fp16_palettized, x = clip_91_cast_fp16)[name = string("linear_37_cast_fp16")]; + fp16 self_attns_3_k_proj_output_min_to_fp16 = const()[name = string("self_attns_3_k_proj_output_min_to_fp16"), val = fp16(-0x1.2cp+4)]; + fp16 self_attns_3_k_proj_output_max_to_fp16 = const()[name = string("self_attns_3_k_proj_output_max_to_fp16"), val = fp16(0x1.2ap+4)]; + tensor clip_94_cast_fp16 = clip(alpha = self_attns_3_k_proj_output_min_to_fp16, beta = self_attns_3_k_proj_output_max_to_fp16, x = linear_37_cast_fp16)[name = string("clip_94_cast_fp16")]; + tensor var_1691 = const()[name = string("op_1691"), val = tensor([1, 50, 8, 128])]; + tensor k_7_cast_fp16 = reshape(shape = var_1691, x = clip_94_cast_fp16)[name = string("k_7_cast_fp16")]; + tensor self_attns_3_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42199872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42724224))))[name = string("self_attns_3_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_3_v_proj_linear_weight_to_fp16_palettized, x = clip_91_cast_fp16)[name = string("linear_38_cast_fp16")]; + fp16 self_attns_3_v_proj_output_min_to_fp16 = const()[name = string("self_attns_3_v_proj_output_min_to_fp16"), val = fp16(-0x1.2cp+4)]; + fp16 self_attns_3_v_proj_output_max_to_fp16 = const()[name = string("self_attns_3_v_proj_output_max_to_fp16"), val = fp16(0x1.2ap+4)]; + tensor clip_96_cast_fp16 = clip(alpha = self_attns_3_v_proj_output_min_to_fp16, beta = self_attns_3_v_proj_output_max_to_fp16, x = linear_38_cast_fp16)[name = string("clip_96_cast_fp16")]; + tensor var_1703 = const()[name = string("op_1703"), val = tensor([1, 50, 8, 128])]; + tensor input_159_cast_fp16 = reshape(shape = var_1703, x = clip_96_cast_fp16)[name = string("input_159_cast_fp16")]; + fp16 var_1705_to_fp16 = const()[name = string("op_1705_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_1706_cast_fp16 = mul(x = q_7_cast_fp16, y = var_1705_to_fp16)[name = string("op_1706_cast_fp16")]; + tensor var_1707_to_fp16 = const()[name = string("op_1707_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42725312)))]; + tensor input_155_cast_fp16 = mul(x = var_1706_cast_fp16, y = var_1707_to_fp16)[name = string("input_155_cast_fp16")]; + fp16 var_1709_to_fp16 = const()[name = string("op_1709_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_157_cast_fp16 = mul(x = k_7_cast_fp16, y = var_1709_to_fp16)[name = string("input_157_cast_fp16")]; + tensor q_padded_7_pad_0 = const()[name = string("q_padded_7_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_7_mode_0 = const()[name = string("q_padded_7_mode_0"), val = string("constant")]; + fp16 const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_7_cast_fp16 = pad(constant_val = const_50_to_fp16, mode = q_padded_7_mode_0, pad = q_padded_7_pad_0, x = input_155_cast_fp16)[name = string("q_padded_7_cast_fp16")]; + tensor var_1713 = const()[name = string("op_1713"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_7_cast_fp16 = reshape(shape = var_1713, x = q_padded_7_cast_fp16)[name = string("q_blocks_7_cast_fp16")]; + tensor k_padded_7_pad_0 = const()[name = string("k_padded_7_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_7_mode_0 = const()[name = string("k_padded_7_mode_0"), val = string("constant")]; + fp16 const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_7_cast_fp16 = pad(constant_val = const_51_to_fp16, mode = k_padded_7_mode_0, pad = k_padded_7_pad_0, x = input_157_cast_fp16)[name = string("k_padded_7_cast_fp16")]; + tensor v_padded_7_pad_0 = const()[name = string("v_padded_7_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_7_mode_0 = const()[name = string("v_padded_7_mode_0"), val = string("constant")]; + fp16 const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_7_cast_fp16 = pad(constant_val = const_52_to_fp16, mode = v_padded_7_mode_0, pad = v_padded_7_pad_0, x = input_159_cast_fp16)[name = string("v_padded_7_cast_fp16")]; + tensor var_1720_begin_0 = const()[name = string("op_1720_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1720_end_0 = const()[name = string("op_1720_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_1720_end_mask_0 = const()[name = string("op_1720_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1720_cast_fp16 = slice_by_index(begin = var_1720_begin_0, end = var_1720_end_0, end_mask = var_1720_end_mask_0, x = k_padded_7_cast_fp16)[name = string("op_1720_cast_fp16")]; + tensor var_1722_begin_0 = const()[name = string("op_1722_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_1722_end_0 = const()[name = string("op_1722_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_1722_end_mask_0 = const()[name = string("op_1722_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1722_cast_fp16 = slice_by_index(begin = var_1722_begin_0, end = var_1722_end_0, end_mask = var_1722_end_mask_0, x = k_padded_7_cast_fp16)[name = string("op_1722_cast_fp16")]; + tensor var_1724_begin_0 = const()[name = string("op_1724_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_1724_end_0 = const()[name = string("op_1724_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_1724_end_mask_0 = const()[name = string("op_1724_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1724_cast_fp16 = slice_by_index(begin = var_1724_begin_0, end = var_1724_end_0, end_mask = var_1724_end_mask_0, x = k_padded_7_cast_fp16)[name = string("op_1724_cast_fp16")]; + tensor var_1726_begin_0 = const()[name = string("op_1726_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_1726_end_0 = const()[name = string("op_1726_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_1726_end_mask_0 = const()[name = string("op_1726_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1726_cast_fp16 = slice_by_index(begin = var_1726_begin_0, end = var_1726_end_0, end_mask = var_1726_end_mask_0, x = k_padded_7_cast_fp16)[name = string("op_1726_cast_fp16")]; + tensor var_1728_begin_0 = const()[name = string("op_1728_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_1728_end_0 = const()[name = string("op_1728_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_1728_end_mask_0 = const()[name = string("op_1728_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1728_cast_fp16 = slice_by_index(begin = var_1728_begin_0, end = var_1728_end_0, end_mask = var_1728_end_mask_0, x = k_padded_7_cast_fp16)[name = string("op_1728_cast_fp16")]; + int32 k_blocks_7_axis_0 = const()[name = string("k_blocks_7_axis_0"), val = int32(1)]; + tensor k_blocks_7_cast_fp16 = stack(axis = k_blocks_7_axis_0, values = (var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16))[name = string("k_blocks_7_cast_fp16")]; + tensor var_1732_begin_0 = const()[name = string("op_1732_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1732_end_0 = const()[name = string("op_1732_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_1732_end_mask_0 = const()[name = string("op_1732_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1732_cast_fp16 = slice_by_index(begin = var_1732_begin_0, end = var_1732_end_0, end_mask = var_1732_end_mask_0, x = v_padded_7_cast_fp16)[name = string("op_1732_cast_fp16")]; + tensor var_1734_begin_0 = const()[name = string("op_1734_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_1734_end_0 = const()[name = string("op_1734_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_1734_end_mask_0 = const()[name = string("op_1734_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1734_cast_fp16 = slice_by_index(begin = var_1734_begin_0, end = var_1734_end_0, end_mask = var_1734_end_mask_0, x = v_padded_7_cast_fp16)[name = string("op_1734_cast_fp16")]; + tensor var_1736_begin_0 = const()[name = string("op_1736_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_1736_end_0 = const()[name = string("op_1736_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_1736_end_mask_0 = const()[name = string("op_1736_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1736_cast_fp16 = slice_by_index(begin = var_1736_begin_0, end = var_1736_end_0, end_mask = var_1736_end_mask_0, x = v_padded_7_cast_fp16)[name = string("op_1736_cast_fp16")]; + tensor var_1738_begin_0 = const()[name = string("op_1738_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_1738_end_0 = const()[name = string("op_1738_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_1738_end_mask_0 = const()[name = string("op_1738_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1738_cast_fp16 = slice_by_index(begin = var_1738_begin_0, end = var_1738_end_0, end_mask = var_1738_end_mask_0, x = v_padded_7_cast_fp16)[name = string("op_1738_cast_fp16")]; + tensor var_1740_begin_0 = const()[name = string("op_1740_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_1740_end_0 = const()[name = string("op_1740_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_1740_end_mask_0 = const()[name = string("op_1740_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1740_cast_fp16 = slice_by_index(begin = var_1740_begin_0, end = var_1740_end_0, end_mask = var_1740_end_mask_0, x = v_padded_7_cast_fp16)[name = string("op_1740_cast_fp16")]; + int32 v_blocks_7_axis_0 = const()[name = string("v_blocks_7_axis_0"), val = int32(1)]; + tensor v_blocks_7_cast_fp16 = stack(axis = v_blocks_7_axis_0, values = (var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16))[name = string("v_blocks_7_cast_fp16")]; + tensor var_1748 = const()[name = string("op_1748"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_1750 = const()[name = string("op_1750"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_7_transpose_x_0 = const()[name = string("matrix_ac_7_transpose_x_0"), val = bool(false)]; + bool matrix_ac_7_transpose_y_0 = const()[name = string("matrix_ac_7_transpose_y_0"), val = bool(false)]; + tensor queries_7_cast_fp16 = transpose(perm = var_1748, x = q_blocks_7_cast_fp16)[name = string("transpose_52")]; + tensor keys_t_7_cast_fp16 = transpose(perm = var_1750, x = k_blocks_7_cast_fp16)[name = string("transpose_53")]; + tensor matrix_ac_7_cast_fp16 = matmul(transpose_x = matrix_ac_7_transpose_x_0, transpose_y = matrix_ac_7_transpose_y_0, x = queries_7_cast_fp16, y = keys_t_7_cast_fp16)[name = string("matrix_ac_7_cast_fp16")]; + tensor var_1753 = const()[name = string("op_1753"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_7_cast_fp16 = reshape(shape = var_1753, x = queries_7_cast_fp16)[name = string("q_flat_7_cast_fp16")]; + bool matrix_bd_31_transpose_x_0 = const()[name = string("matrix_bd_31_transpose_x_0"), val = bool(false)]; + bool matrix_bd_31_transpose_y_0 = const()[name = string("matrix_bd_31_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_7_to_fp16 = const()[name = string("rel_k_t_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42725632)))]; + tensor matrix_bd_31_cast_fp16 = matmul(transpose_x = matrix_bd_31_transpose_x_0, transpose_y = matrix_bd_31_transpose_y_0, x = q_flat_7_cast_fp16, y = rel_k_t_7_to_fp16)[name = string("matrix_bd_31_cast_fp16")]; + tensor var_1758 = const()[name = string("op_1758"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_161_cast_fp16 = reshape(shape = var_1758, x = matrix_bd_31_cast_fp16)[name = string("input_161_cast_fp16")]; + tensor matrix_bd_33_pad_0 = const()[name = string("matrix_bd_33_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42752320)))]; + string matrix_bd_33_mode_0 = const()[name = string("matrix_bd_33_mode_0"), val = string("constant")]; + fp16 const_54_to_fp16 = const()[name = string("const_54_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_33_cast_fp16 = pad(constant_val = const_54_to_fp16, mode = matrix_bd_33_mode_0, pad = matrix_bd_33_pad_0, x = input_161_cast_fp16)[name = string("matrix_bd_33_cast_fp16")]; + tensor var_1762 = const()[name = string("op_1762"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_35_cast_fp16 = reshape(shape = var_1762, x = matrix_bd_33_cast_fp16)[name = string("matrix_bd_35_cast_fp16")]; + tensor matrix_bd_37_begin_0 = const()[name = string("matrix_bd_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_37_end_0 = const()[name = string("matrix_bd_37_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_37_end_mask_0 = const()[name = string("matrix_bd_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_37_cast_fp16 = slice_by_index(begin = matrix_bd_37_begin_0, end = matrix_bd_37_end_0, end_mask = matrix_bd_37_end_mask_0, x = matrix_bd_35_cast_fp16)[name = string("matrix_bd_37_cast_fp16")]; + tensor var_1768 = const()[name = string("op_1768"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_39_cast_fp16 = reshape(shape = var_1768, x = matrix_bd_37_cast_fp16)[name = string("matrix_bd_39_cast_fp16")]; + tensor attn_19_cast_fp16 = add(x = matrix_ac_7_cast_fp16, y = matrix_bd_39_cast_fp16)[name = string("attn_19_cast_fp16")]; + fp16 _inversed_1771_y_0_to_fp16 = const()[name = string("_inversed_1771_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_1771_cast_fp16 = mul(x = attn_19_cast_fp16, y = _inversed_1771_y_0_to_fp16)[name = string("_inversed_1771_cast_fp16")]; + string _inversed_1771_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_1771_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_1771_cast_fp16_to_fp32 = cast(dtype = _inversed_1771_cast_fp16_to_fp32_dtype_0, x = _inversed_1771_cast_fp16)[name = string("cast_390")]; + tensor var_1772 = tanh(x = _inversed_1771_cast_fp16_to_fp32)[name = string("op_1772")]; + string var_1772_to_fp16_dtype_0 = const()[name = string("op_1772_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_3_softcap_to_fp16 = const()[name = string("self_attns_3_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_1772_to_fp16 = cast(dtype = var_1772_to_fp16_dtype_0, x = var_1772)[name = string("cast_389")]; + tensor attn_21_cast_fp16 = mul(x = var_1772_to_fp16, y = self_attns_3_softcap_to_fp16)[name = string("attn_21_cast_fp16")]; + string attn_21_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_21_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_21_cast_fp16_to_fp32 = cast(dtype = attn_21_cast_fp16_to_fp32_dtype_0, x = attn_21_cast_fp16)[name = string("cast_388")]; + tensor input_163 = select(a = var_1635, b = attn_21_cast_fp16_to_fp32, cond = var_460)[name = string("input_163")]; + tensor var_1776 = softmax(axis = var_1634, x = input_163)[name = string("op_1776")]; + tensor var_1778 = const()[name = string("op_1778"), val = tensor([0, 3, 1, -3, -1])]; + bool out_19_transpose_x_0 = const()[name = string("out_19_transpose_x_0"), val = bool(false)]; + bool out_19_transpose_y_0 = const()[name = string("out_19_transpose_y_0"), val = bool(false)]; + string var_1776_to_fp16_dtype_0 = const()[name = string("op_1776_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_7_cast_fp16 = transpose(perm = var_1778, x = v_blocks_7_cast_fp16)[name = string("transpose_51")]; + tensor var_1776_to_fp16 = cast(dtype = var_1776_to_fp16_dtype_0, x = var_1776)[name = string("cast_387")]; + tensor out_19_cast_fp16 = matmul(transpose_x = out_19_transpose_x_0, transpose_y = out_19_transpose_y_0, x = var_1776_to_fp16, y = values_t_7_cast_fp16)[name = string("out_19_cast_fp16")]; + tensor var_1781 = const()[name = string("op_1781"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_1783 = const()[name = string("op_1783"), val = tensor([1, 60, 1024])]; + tensor var_1782_cast_fp16 = transpose(perm = var_1781, x = out_19_cast_fp16)[name = string("transpose_50")]; + tensor out_21_cast_fp16 = reshape(shape = var_1783, x = var_1782_cast_fp16)[name = string("out_21_cast_fp16")]; + tensor var_1786_begin_0 = const()[name = string("op_1786_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1786_end_0 = const()[name = string("op_1786_end_0"), val = tensor([1, 50, 1024])]; + tensor var_1786_end_mask_0 = const()[name = string("op_1786_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1786_cast_fp16 = slice_by_index(begin = var_1786_begin_0, end = var_1786_end_0, end_mask = var_1786_end_mask_0, x = out_21_cast_fp16)[name = string("op_1786_cast_fp16")]; + fp16 self_attns_3_post_input_min_to_fp16 = const()[name = string("self_attns_3_post_input_min_to_fp16"), val = fp16(-0x1.08p+4)]; + fp16 self_attns_3_post_input_max_to_fp16 = const()[name = string("self_attns_3_post_input_max_to_fp16"), val = fp16(0x1.06p+4)]; + tensor clip_97_cast_fp16 = clip(alpha = self_attns_3_post_input_min_to_fp16, beta = self_attns_3_post_input_max_to_fp16, x = var_1786_cast_fp16)[name = string("clip_97_cast_fp16")]; + tensor self_attns_3_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42752448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43276800))))[name = string("self_attns_3_post_linear_weight_to_fp16_palettized")]; + tensor linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_3_post_linear_weight_to_fp16_palettized, x = clip_97_cast_fp16)[name = string("linear_40_cast_fp16")]; + fp16 self_attns_3_post_output_min_to_fp16 = const()[name = string("self_attns_3_post_output_min_to_fp16"), val = fp16(-0x1.72p+5)]; + fp16 self_attns_3_post_output_max_to_fp16 = const()[name = string("self_attns_3_post_output_max_to_fp16"), val = fp16(0x1.7p+5)]; + tensor clip_98_cast_fp16 = clip(alpha = self_attns_3_post_output_min_to_fp16, beta = self_attns_3_post_output_max_to_fp16, x = linear_40_cast_fp16)[name = string("clip_98_cast_fp16")]; + fp16 var_1798_to_fp16 = const()[name = string("op_1798_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_1799_to_fp16 = const()[name = string("op_1799_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_99_cast_fp16 = clip(alpha = var_1798_to_fp16, beta = var_1799_to_fp16, x = clip_98_cast_fp16)[name = string("clip_99_cast_fp16")]; + string clip_99_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_99_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1801 = const()[name = string("op_1801"), val = fp32(-0x1p-1)]; + fp32 var_1805_promoted = const()[name = string("op_1805_promoted"), val = fp32(0x1p+1)]; + tensor clip_99_cast_fp16_to_fp32 = cast(dtype = clip_99_cast_fp16_to_fp32_dtype_0, x = clip_99_cast_fp16)[name = string("cast_386")]; + tensor var_1811 = pow(x = clip_99_cast_fp16_to_fp32, y = var_1805_promoted)[name = string("op_1811")]; + tensor var_1813_axes_0 = const()[name = string("op_1813_axes_0"), val = tensor([-1])]; + bool var_1813_keep_dims_0 = const()[name = string("op_1813_keep_dims_0"), val = bool(true)]; + tensor var_1813 = reduce_mean(axes = var_1813_axes_0, keep_dims = var_1813_keep_dims_0, x = var_1811)[name = string("op_1813")]; + string var_1813_to_fp16_dtype_0 = const()[name = string("op_1813_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1814_to_fp16 = const()[name = string("op_1814_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1813_to_fp16 = cast(dtype = var_1813_to_fp16_dtype_0, x = var_1813)[name = string("cast_385")]; + tensor mean_squared_61_cast_fp16 = add(x = var_1813_to_fp16, y = var_1814_to_fp16)[name = string("mean_squared_61_cast_fp16")]; + string mean_squared_61_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_61_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_61_cast_fp16_to_fp32 = cast(dtype = mean_squared_61_cast_fp16_to_fp32_dtype_0, x = mean_squared_61_cast_fp16)[name = string("cast_384")]; + tensor var_1816 = pow(x = mean_squared_61_cast_fp16_to_fp32, y = var_1801)[name = string("op_1816")]; + string var_1816_to_fp16_dtype_0 = const()[name = string("op_1816_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1816_to_fp16 = cast(dtype = var_1816_to_fp16_dtype_0, x = var_1816)[name = string("cast_383")]; + tensor normed_output_121_cast_fp16 = mul(x = clip_99_cast_fp16, y = var_1816_to_fp16)[name = string("normed_output_121_cast_fp16")]; + tensor const_55_to_fp16 = const()[name = string("const_55_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43277888)))]; + tensor normed_output_123_cast_fp16 = mul(x = normed_output_121_cast_fp16, y = const_55_to_fp16)[name = string("normed_output_123_cast_fp16")]; + tensor hidden_states_367_cast_fp16 = add(x = normed_output_123_cast_fp16, y = hidden_states_341_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + string hidden_states_367_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_367_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1823 = const()[name = string("op_1823"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_1824 = const()[name = string("op_1824"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_1836 = const()[name = string("op_1836"), val = fp32(-0x1p-1)]; + fp32 var_1832_promoted = const()[name = string("op_1832_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_367_cast_fp16_to_fp32 = cast(dtype = hidden_states_367_cast_fp16_to_fp32_dtype_0, x = hidden_states_367_cast_fp16)[name = string("cast_382")]; + tensor var_1844 = pow(x = hidden_states_367_cast_fp16_to_fp32, y = var_1832_promoted)[name = string("op_1844")]; + tensor var_1846_axes_0 = const()[name = string("op_1846_axes_0"), val = tensor([-1])]; + bool var_1846_keep_dims_0 = const()[name = string("op_1846_keep_dims_0"), val = bool(true)]; + tensor var_1846 = reduce_mean(axes = var_1846_axes_0, keep_dims = var_1846_keep_dims_0, x = var_1844)[name = string("op_1846")]; + string var_1846_to_fp16_dtype_0 = const()[name = string("op_1846_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1847_to_fp16 = const()[name = string("op_1847_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1846_to_fp16 = cast(dtype = var_1846_to_fp16_dtype_0, x = var_1846)[name = string("cast_381")]; + tensor mean_squared_63_cast_fp16 = add(x = var_1846_to_fp16, y = var_1847_to_fp16)[name = string("mean_squared_63_cast_fp16")]; + string mean_squared_63_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_63_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_63_cast_fp16_to_fp32 = cast(dtype = mean_squared_63_cast_fp16_to_fp32_dtype_0, x = mean_squared_63_cast_fp16)[name = string("cast_380")]; + tensor var_1849 = pow(x = mean_squared_63_cast_fp16_to_fp32, y = var_1836)[name = string("op_1849")]; + string var_1849_to_fp16_dtype_0 = const()[name = string("op_1849_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1849_to_fp16 = cast(dtype = var_1849_to_fp16_dtype_0, x = var_1849)[name = string("cast_379")]; + tensor normed_output_125_cast_fp16 = mul(x = hidden_states_367_cast_fp16, y = var_1849_to_fp16)[name = string("normed_output_125_cast_fp16")]; + tensor const_56_to_fp16 = const()[name = string("const_56_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43280000)))]; + tensor normed_output_127_cast_fp16 = mul(x = normed_output_125_cast_fp16, y = const_56_to_fp16)[name = string("normed_output_127_cast_fp16")]; + fp16 lconv1ds_3_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_3_linear_start_input_min_to_fp16"), val = fp16(-0x1.68p+3)]; + fp16 lconv1ds_3_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_3_linear_start_input_max_to_fp16"), val = fp16(0x1.66p+3)]; + tensor clip_100_cast_fp16 = clip(alpha = lconv1ds_3_linear_start_input_min_to_fp16, beta = lconv1ds_3_linear_start_input_max_to_fp16, x = normed_output_127_cast_fp16)[name = string("clip_100_cast_fp16")]; + tensor lconv1ds_3_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43282112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44330752))))[name = string("lconv1ds_3_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_41_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_3_linear_start_linear_weight_to_fp16_palettized, x = clip_100_cast_fp16)[name = string("linear_41_cast_fp16")]; + fp16 lconv1ds_3_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_3_linear_start_output_min_to_fp16"), val = fp16(-0x1.9p+4)]; + fp16 lconv1ds_3_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_3_linear_start_output_max_to_fp16"), val = fp16(0x1.8cp+4)]; + tensor clip_101_cast_fp16 = clip(alpha = lconv1ds_3_linear_start_output_min_to_fp16, beta = lconv1ds_3_linear_start_output_max_to_fp16, x = linear_41_cast_fp16)[name = string("clip_101_cast_fp16")]; + int32 hidden_states_375_split_num_splits_0 = const()[name = string("hidden_states_375_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_375_split_axis_0 = const()[name = string("hidden_states_375_split_axis_0"), val = int32(-1)]; + tensor hidden_states_375_split_cast_fp16_0, tensor hidden_states_375_split_cast_fp16_1 = split(axis = hidden_states_375_split_axis_0, num_splits = hidden_states_375_split_num_splits_0, x = clip_101_cast_fp16)[name = string("hidden_states_375_split_cast_fp16")]; + tensor hidden_states_375_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_375_split_cast_fp16_1)[name = string("hidden_states_375_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_375_cast_fp16 = mul(x = hidden_states_375_split_cast_fp16_0, y = hidden_states_375_split_1_sigmoid_cast_fp16)[name = string("hidden_states_375_cast_fp16")]; + tensor input_171_perm_0 = const()[name = string("input_171_perm_0"), val = tensor([0, 2, 1])]; + tensor input_173_pad_0 = const()[name = string("input_173_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_173_mode_0 = const()[name = string("input_173_mode_0"), val = string("constant")]; + fp16 const_57_to_fp16 = const()[name = string("const_57_to_fp16"), val = fp16(0x0p+0)]; + tensor input_171_cast_fp16 = transpose(perm = input_171_perm_0, x = hidden_states_375_cast_fp16)[name = string("transpose_49")]; + tensor input_173_cast_fp16 = pad(constant_val = const_57_to_fp16, mode = input_173_mode_0, pad = input_173_pad_0, x = input_171_cast_fp16)[name = string("input_173_cast_fp16")]; + string var_1875_pad_type_0 = const()[name = string("op_1875_pad_type_0"), val = string("valid")]; + int32 var_1875_groups_0 = const()[name = string("op_1875_groups_0"), val = int32(1024)]; + tensor var_1875_strides_0 = const()[name = string("op_1875_strides_0"), val = tensor([1])]; + tensor var_1875_pad_0 = const()[name = string("op_1875_pad_0"), val = tensor([0, 0])]; + tensor var_1875_dilations_0 = const()[name = string("op_1875_dilations_0"), val = tensor([1])]; + tensor lconv1ds_3_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44332864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44335488))))[name = string("lconv1ds_3_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_1875_cast_fp16 = conv(dilations = var_1875_dilations_0, groups = var_1875_groups_0, pad = var_1875_pad_0, pad_type = var_1875_pad_type_0, strides = var_1875_strides_0, weight = lconv1ds_3_depthwise_conv1d_weight_to_fp16_palettized, x = input_173_cast_fp16)[name = string("op_1875_cast_fp16")]; + tensor hidden_states_377_perm_0 = const()[name = string("hidden_states_377_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_377_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_377_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_377_cast_fp16 = transpose(perm = hidden_states_377_perm_0, x = var_1875_cast_fp16)[name = string("transpose_48")]; + tensor hidden_states_377_cast_fp16_to_fp32 = cast(dtype = hidden_states_377_cast_fp16_to_fp32_dtype_0, x = hidden_states_377_cast_fp16)[name = string("cast_378")]; + tensor clip_102 = clip(alpha = var_1824, beta = var_1823, x = hidden_states_377_cast_fp16_to_fp32)[name = string("clip_102")]; + fp32 var_1832_promoted_1 = const()[name = string("op_1832_promoted_1"), val = fp32(0x1p+1)]; + tensor var_1880 = pow(x = clip_102, y = var_1832_promoted_1)[name = string("op_1880")]; + tensor var_1882_axes_0 = const()[name = string("op_1882_axes_0"), val = tensor([-1])]; + bool var_1882_keep_dims_0 = const()[name = string("op_1882_keep_dims_0"), val = bool(true)]; + tensor var_1882 = reduce_mean(axes = var_1882_axes_0, keep_dims = var_1882_keep_dims_0, x = var_1880)[name = string("op_1882")]; + string var_1882_to_fp16_dtype_0 = const()[name = string("op_1882_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1883_to_fp16 = const()[name = string("op_1883_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1882_to_fp16 = cast(dtype = var_1882_to_fp16_dtype_0, x = var_1882)[name = string("cast_377")]; + tensor mean_squared_65_cast_fp16 = add(x = var_1882_to_fp16, y = var_1883_to_fp16)[name = string("mean_squared_65_cast_fp16")]; + string mean_squared_65_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_65_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_65_cast_fp16_to_fp32 = cast(dtype = mean_squared_65_cast_fp16_to_fp32_dtype_0, x = mean_squared_65_cast_fp16)[name = string("cast_376")]; + tensor var_1885 = pow(x = mean_squared_65_cast_fp16_to_fp32, y = var_1836)[name = string("op_1885")]; + string clip_102_to_fp16_dtype_0 = const()[name = string("clip_102_to_fp16_dtype_0"), val = string("fp16")]; + string var_1885_to_fp16_dtype_0 = const()[name = string("op_1885_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_102_to_fp16 = cast(dtype = clip_102_to_fp16_dtype_0, x = clip_102)[name = string("cast_374")]; + tensor var_1885_to_fp16 = cast(dtype = var_1885_to_fp16_dtype_0, x = var_1885)[name = string("cast_375")]; + tensor normed_output_129_cast_fp16 = mul(x = clip_102_to_fp16, y = var_1885_to_fp16)[name = string("normed_output_129_cast_fp16")]; + tensor const_58_to_fp16 = const()[name = string("const_58_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44336576)))]; + tensor normed_output_131_cast_fp16 = mul(x = normed_output_129_cast_fp16, y = const_58_to_fp16)[name = string("normed_output_131_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = silu(x = normed_output_131_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + fp16 lconv1ds_3_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_3_linear_end_input_min_to_fp16"), val = fp16(-0x1.e4p+2)]; + fp16 lconv1ds_3_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_3_linear_end_input_max_to_fp16"), val = fp16(0x1.ep+2)]; + tensor clip_103_cast_fp16 = clip(alpha = lconv1ds_3_linear_end_input_min_to_fp16, beta = lconv1ds_3_linear_end_input_max_to_fp16, x = hidden_states_383_cast_fp16)[name = string("clip_103_cast_fp16")]; + tensor lconv1ds_3_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44338688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44863040))))[name = string("lconv1ds_3_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_3_linear_end_linear_weight_to_fp16_palettized, x = clip_103_cast_fp16)[name = string("linear_42_cast_fp16")]; + fp16 lconv1ds_3_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_3_linear_end_output_min_to_fp16"), val = fp16(-0x1.fcp+2)]; + fp16 lconv1ds_3_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_3_linear_end_output_max_to_fp16"), val = fp16(0x1.f8p+2)]; + tensor clip_104_cast_fp16 = clip(alpha = lconv1ds_3_linear_end_output_min_to_fp16, beta = lconv1ds_3_linear_end_output_max_to_fp16, x = linear_42_cast_fp16)[name = string("clip_104_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = clip_104_cast_fp16, y = hidden_states_367_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + string hidden_states_389_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_389_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1909 = const()[name = string("op_1909"), val = fp32(-0x1p-1)]; + fp32 var_1910 = const()[name = string("op_1910"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_1911 = const()[name = string("op_1911"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_389_cast_fp16_to_fp32 = cast(dtype = hidden_states_389_cast_fp16_to_fp32_dtype_0, x = hidden_states_389_cast_fp16)[name = string("cast_373")]; + tensor clip_105 = clip(alpha = var_1911, beta = var_1910, x = hidden_states_389_cast_fp16_to_fp32)[name = string("clip_105")]; + fp32 var_1905_promoted = const()[name = string("op_1905_promoted"), val = fp32(0x1p+1)]; + tensor var_1919 = pow(x = clip_105, y = var_1905_promoted)[name = string("op_1919")]; + tensor var_1921_axes_0 = const()[name = string("op_1921_axes_0"), val = tensor([-1])]; + bool var_1921_keep_dims_0 = const()[name = string("op_1921_keep_dims_0"), val = bool(true)]; + tensor var_1921 = reduce_mean(axes = var_1921_axes_0, keep_dims = var_1921_keep_dims_0, x = var_1919)[name = string("op_1921")]; + string var_1921_to_fp16_dtype_0 = const()[name = string("op_1921_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1922_to_fp16 = const()[name = string("op_1922_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1921_to_fp16 = cast(dtype = var_1921_to_fp16_dtype_0, x = var_1921)[name = string("cast_372")]; + tensor mean_squared_67_cast_fp16 = add(x = var_1921_to_fp16, y = var_1922_to_fp16)[name = string("mean_squared_67_cast_fp16")]; + string mean_squared_67_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_67_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_67_cast_fp16_to_fp32 = cast(dtype = mean_squared_67_cast_fp16_to_fp32_dtype_0, x = mean_squared_67_cast_fp16)[name = string("cast_371")]; + tensor var_1924 = pow(x = mean_squared_67_cast_fp16_to_fp32, y = var_1909)[name = string("op_1924")]; + string clip_105_to_fp16_dtype_0 = const()[name = string("clip_105_to_fp16_dtype_0"), val = string("fp16")]; + string var_1924_to_fp16_dtype_0 = const()[name = string("op_1924_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_105_to_fp16 = cast(dtype = clip_105_to_fp16_dtype_0, x = clip_105)[name = string("cast_369")]; + tensor var_1924_to_fp16 = cast(dtype = var_1924_to_fp16_dtype_0, x = var_1924)[name = string("cast_370")]; + tensor normed_output_133_cast_fp16 = mul(x = clip_105_to_fp16, y = var_1924_to_fp16)[name = string("normed_output_133_cast_fp16")]; + tensor const_59_to_fp16 = const()[name = string("const_59_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44864128)))]; + tensor normed_output_135_cast_fp16 = mul(x = normed_output_133_cast_fp16, y = const_59_to_fp16)[name = string("normed_output_135_cast_fp16")]; + fp16 feed_forward2s_3_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_3_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.bap+3)]; + fp16 feed_forward2s_3_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_3_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.b6p+3)]; + tensor clip_106_cast_fp16 = clip(alpha = feed_forward2s_3_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_3_ffw_layer_1_input_max_to_fp16, x = normed_output_135_cast_fp16)[name = string("clip_106_cast_fp16")]; + tensor feed_forward2s_3_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44866240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46963456))))[name = string("feed_forward2s_3_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_3_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_106_cast_fp16)[name = string("linear_43_cast_fp16")]; + fp16 feed_forward2s_3_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_3_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.d8p+4)]; + fp16 feed_forward2s_3_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_3_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.d4p+4)]; + tensor clip_107_cast_fp16 = clip(alpha = feed_forward2s_3_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_3_ffw_layer_1_output_max_to_fp16, x = linear_43_cast_fp16)[name = string("clip_107_cast_fp16")]; + tensor hidden_states_399_cast_fp16 = silu(x = clip_107_cast_fp16)[name = string("hidden_states_399_cast_fp16")]; + fp16 feed_forward2s_3_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_3_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.acp+3)]; + fp16 feed_forward2s_3_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_3_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.a8p+3)]; + tensor clip_108_cast_fp16 = clip(alpha = feed_forward2s_3_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_3_ffw_layer_2_input_max_to_fp16, x = hidden_states_399_cast_fp16)[name = string("clip_108_cast_fp16")]; + tensor feed_forward2s_3_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46967616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49064832))))[name = string("feed_forward2s_3_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_3_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_108_cast_fp16)[name = string("linear_44_cast_fp16")]; + fp16 feed_forward2s_3_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_3_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.74p+6)]; + fp16 feed_forward2s_3_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_3_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.72p+6)]; + tensor clip_109_cast_fp16 = clip(alpha = feed_forward2s_3_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_3_ffw_layer_2_output_max_to_fp16, x = linear_44_cast_fp16)[name = string("clip_109_cast_fp16")]; + string clip_109_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_109_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_109_cast_fp16_to_fp32 = cast(dtype = clip_109_cast_fp16_to_fp32_dtype_0, x = clip_109_cast_fp16)[name = string("cast_368")]; + tensor clip_110 = clip(alpha = var_1911, beta = var_1910, x = clip_109_cast_fp16_to_fp32)[name = string("clip_110")]; + fp32 var_1905_promoted_1 = const()[name = string("op_1905_promoted_1"), val = fp32(0x1p+1)]; + tensor var_1951 = pow(x = clip_110, y = var_1905_promoted_1)[name = string("op_1951")]; + tensor var_1953_axes_0 = const()[name = string("op_1953_axes_0"), val = tensor([-1])]; + bool var_1953_keep_dims_0 = const()[name = string("op_1953_keep_dims_0"), val = bool(true)]; + tensor var_1953 = reduce_mean(axes = var_1953_axes_0, keep_dims = var_1953_keep_dims_0, x = var_1951)[name = string("op_1953")]; + string var_1953_to_fp16_dtype_0 = const()[name = string("op_1953_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1954_to_fp16 = const()[name = string("op_1954_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1953_to_fp16 = cast(dtype = var_1953_to_fp16_dtype_0, x = var_1953)[name = string("cast_367")]; + tensor mean_squared_69_cast_fp16 = add(x = var_1953_to_fp16, y = var_1954_to_fp16)[name = string("mean_squared_69_cast_fp16")]; + string mean_squared_69_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_69_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_69_cast_fp16_to_fp32 = cast(dtype = mean_squared_69_cast_fp16_to_fp32_dtype_0, x = mean_squared_69_cast_fp16)[name = string("cast_366")]; + tensor var_1956 = pow(x = mean_squared_69_cast_fp16_to_fp32, y = var_1909)[name = string("op_1956")]; + string clip_110_to_fp16_dtype_0 = const()[name = string("clip_110_to_fp16_dtype_0"), val = string("fp16")]; + string var_1956_to_fp16_dtype_0 = const()[name = string("op_1956_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_110_to_fp16 = cast(dtype = clip_110_to_fp16_dtype_0, x = clip_110)[name = string("cast_364")]; + tensor var_1956_to_fp16 = cast(dtype = var_1956_to_fp16_dtype_0, x = var_1956)[name = string("cast_365")]; + tensor normed_output_137_cast_fp16 = mul(x = clip_110_to_fp16, y = var_1956_to_fp16)[name = string("normed_output_137_cast_fp16")]; + tensor const_60_to_fp16 = const()[name = string("const_60_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49065920)))]; + tensor normed_output_139_cast_fp16 = mul(x = normed_output_137_cast_fp16, y = const_60_to_fp16)[name = string("normed_output_139_cast_fp16")]; + fp16 var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_411_cast_fp16 = mul(x = normed_output_139_cast_fp16, y = var_1901_to_fp16)[name = string("hidden_states_411_cast_fp16")]; + tensor hidden_states_413_cast_fp16 = add(x = hidden_states_411_cast_fp16, y = hidden_states_389_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; + fp16 var_1963_to_fp16 = const()[name = string("op_1963_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_1964_to_fp16 = const()[name = string("op_1964_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_111_cast_fp16 = clip(alpha = var_1963_to_fp16, beta = var_1964_to_fp16, x = hidden_states_413_cast_fp16)[name = string("clip_111_cast_fp16")]; + string clip_111_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_111_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1966 = const()[name = string("op_1966"), val = fp32(-0x1p-1)]; + fp32 var_1970_promoted = const()[name = string("op_1970_promoted"), val = fp32(0x1p+1)]; + tensor clip_111_cast_fp16_to_fp32 = cast(dtype = clip_111_cast_fp16_to_fp32_dtype_0, x = clip_111_cast_fp16)[name = string("cast_363")]; + tensor var_1976 = pow(x = clip_111_cast_fp16_to_fp32, y = var_1970_promoted)[name = string("op_1976")]; + tensor var_1978_axes_0 = const()[name = string("op_1978_axes_0"), val = tensor([-1])]; + bool var_1978_keep_dims_0 = const()[name = string("op_1978_keep_dims_0"), val = bool(true)]; + tensor var_1978 = reduce_mean(axes = var_1978_axes_0, keep_dims = var_1978_keep_dims_0, x = var_1976)[name = string("op_1978")]; + string var_1978_to_fp16_dtype_0 = const()[name = string("op_1978_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_1979_to_fp16 = const()[name = string("op_1979_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_1978_to_fp16 = cast(dtype = var_1978_to_fp16_dtype_0, x = var_1978)[name = string("cast_362")]; + tensor mean_squared_71_cast_fp16 = add(x = var_1978_to_fp16, y = var_1979_to_fp16)[name = string("mean_squared_71_cast_fp16")]; + string mean_squared_71_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_71_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_71_cast_fp16_to_fp32 = cast(dtype = mean_squared_71_cast_fp16_to_fp32_dtype_0, x = mean_squared_71_cast_fp16)[name = string("cast_361")]; + tensor var_1981 = pow(x = mean_squared_71_cast_fp16_to_fp32, y = var_1966)[name = string("op_1981")]; + string var_1981_to_fp16_dtype_0 = const()[name = string("op_1981_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_1981_to_fp16 = cast(dtype = var_1981_to_fp16_dtype_0, x = var_1981)[name = string("cast_360")]; + tensor normed_output_141_cast_fp16 = mul(x = clip_111_cast_fp16, y = var_1981_to_fp16)[name = string("normed_output_141_cast_fp16")]; + tensor const_61_to_fp16 = const()[name = string("const_61_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49068032)))]; + tensor normed_output_143_cast_fp16 = mul(x = normed_output_141_cast_fp16, y = const_61_to_fp16)[name = string("normed_output_143_cast_fp16")]; + string normed_output_143_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_143_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_1994 = const()[name = string("op_1994"), val = fp32(-0x1p-1)]; + fp32 var_1995 = const()[name = string("op_1995"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_1996 = const()[name = string("op_1996"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_143_cast_fp16_to_fp32 = cast(dtype = normed_output_143_cast_fp16_to_fp32_dtype_0, x = normed_output_143_cast_fp16)[name = string("cast_359")]; + tensor clip_112 = clip(alpha = var_1996, beta = var_1995, x = normed_output_143_cast_fp16_to_fp32)[name = string("clip_112")]; + fp32 var_1990_promoted = const()[name = string("op_1990_promoted"), val = fp32(0x1p+1)]; + tensor var_2004 = pow(x = clip_112, y = var_1990_promoted)[name = string("op_2004")]; + tensor var_2006_axes_0 = const()[name = string("op_2006_axes_0"), val = tensor([-1])]; + bool var_2006_keep_dims_0 = const()[name = string("op_2006_keep_dims_0"), val = bool(true)]; + tensor var_2006 = reduce_mean(axes = var_2006_axes_0, keep_dims = var_2006_keep_dims_0, x = var_2004)[name = string("op_2006")]; + string var_2006_to_fp16_dtype_0 = const()[name = string("op_2006_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2007_to_fp16 = const()[name = string("op_2007_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2006_to_fp16 = cast(dtype = var_2006_to_fp16_dtype_0, x = var_2006)[name = string("cast_358")]; + tensor mean_squared_73_cast_fp16 = add(x = var_2006_to_fp16, y = var_2007_to_fp16)[name = string("mean_squared_73_cast_fp16")]; + string mean_squared_73_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_73_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_73_cast_fp16_to_fp32 = cast(dtype = mean_squared_73_cast_fp16_to_fp32_dtype_0, x = mean_squared_73_cast_fp16)[name = string("cast_357")]; + tensor var_2009 = pow(x = mean_squared_73_cast_fp16_to_fp32, y = var_1994)[name = string("op_2009")]; + string clip_112_to_fp16_dtype_0 = const()[name = string("clip_112_to_fp16_dtype_0"), val = string("fp16")]; + string var_2009_to_fp16_dtype_0 = const()[name = string("op_2009_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_112_to_fp16 = cast(dtype = clip_112_to_fp16_dtype_0, x = clip_112)[name = string("cast_355")]; + tensor var_2009_to_fp16 = cast(dtype = var_2009_to_fp16_dtype_0, x = var_2009)[name = string("cast_356")]; + tensor normed_output_145_cast_fp16 = mul(x = clip_112_to_fp16, y = var_2009_to_fp16)[name = string("normed_output_145_cast_fp16")]; + tensor const_62_to_fp16 = const()[name = string("const_62_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49070144)))]; + tensor normed_output_147_cast_fp16 = mul(x = normed_output_145_cast_fp16, y = const_62_to_fp16)[name = string("normed_output_147_cast_fp16")]; + fp16 feed_forward1s_4_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_4_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.8ap+3)]; + fp16 feed_forward1s_4_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_4_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.86p+3)]; + tensor clip_113_cast_fp16 = clip(alpha = feed_forward1s_4_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_4_ffw_layer_1_input_max_to_fp16, x = normed_output_147_cast_fp16)[name = string("clip_113_cast_fp16")]; + tensor feed_forward1s_4_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49072256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51169472))))[name = string("feed_forward1s_4_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_4_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_113_cast_fp16)[name = string("linear_45_cast_fp16")]; + fp16 feed_forward1s_4_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_4_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.ap+4)]; + fp16 feed_forward1s_4_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_4_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.9cp+4)]; + tensor clip_114_cast_fp16 = clip(alpha = feed_forward1s_4_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_4_ffw_layer_1_output_max_to_fp16, x = linear_45_cast_fp16)[name = string("clip_114_cast_fp16")]; + tensor hidden_states_429_cast_fp16 = silu(x = clip_114_cast_fp16)[name = string("hidden_states_429_cast_fp16")]; + fp16 feed_forward1s_4_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_4_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.32p+3)]; + fp16 feed_forward1s_4_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_4_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.2ep+3)]; + tensor clip_115_cast_fp16 = clip(alpha = feed_forward1s_4_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_4_ffw_layer_2_input_max_to_fp16, x = hidden_states_429_cast_fp16)[name = string("clip_115_cast_fp16")]; + tensor feed_forward1s_4_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51173632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53270848))))[name = string("feed_forward1s_4_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_46_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_4_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_115_cast_fp16)[name = string("linear_46_cast_fp16")]; + fp16 feed_forward1s_4_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_4_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.7cp+5)]; + fp16 feed_forward1s_4_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_4_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.78p+5)]; + tensor clip_116_cast_fp16 = clip(alpha = feed_forward1s_4_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_4_ffw_layer_2_output_max_to_fp16, x = linear_46_cast_fp16)[name = string("clip_116_cast_fp16")]; + string clip_116_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_116_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_116_cast_fp16_to_fp32 = cast(dtype = clip_116_cast_fp16_to_fp32_dtype_0, x = clip_116_cast_fp16)[name = string("cast_354")]; + tensor clip_117 = clip(alpha = var_1996, beta = var_1995, x = clip_116_cast_fp16_to_fp32)[name = string("clip_117")]; + fp32 var_1990_promoted_1 = const()[name = string("op_1990_promoted_1"), val = fp32(0x1p+1)]; + tensor var_2036 = pow(x = clip_117, y = var_1990_promoted_1)[name = string("op_2036")]; + tensor var_2038_axes_0 = const()[name = string("op_2038_axes_0"), val = tensor([-1])]; + bool var_2038_keep_dims_0 = const()[name = string("op_2038_keep_dims_0"), val = bool(true)]; + tensor var_2038 = reduce_mean(axes = var_2038_axes_0, keep_dims = var_2038_keep_dims_0, x = var_2036)[name = string("op_2038")]; + string var_2038_to_fp16_dtype_0 = const()[name = string("op_2038_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2039_to_fp16 = const()[name = string("op_2039_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2038_to_fp16 = cast(dtype = var_2038_to_fp16_dtype_0, x = var_2038)[name = string("cast_353")]; + tensor mean_squared_75_cast_fp16 = add(x = var_2038_to_fp16, y = var_2039_to_fp16)[name = string("mean_squared_75_cast_fp16")]; + string mean_squared_75_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_75_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_75_cast_fp16_to_fp32 = cast(dtype = mean_squared_75_cast_fp16_to_fp32_dtype_0, x = mean_squared_75_cast_fp16)[name = string("cast_352")]; + tensor var_2041 = pow(x = mean_squared_75_cast_fp16_to_fp32, y = var_1994)[name = string("op_2041")]; + string clip_117_to_fp16_dtype_0 = const()[name = string("clip_117_to_fp16_dtype_0"), val = string("fp16")]; + string var_2041_to_fp16_dtype_0 = const()[name = string("op_2041_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_117_to_fp16 = cast(dtype = clip_117_to_fp16_dtype_0, x = clip_117)[name = string("cast_350")]; + tensor var_2041_to_fp16 = cast(dtype = var_2041_to_fp16_dtype_0, x = var_2041)[name = string("cast_351")]; + tensor normed_output_149_cast_fp16 = mul(x = clip_117_to_fp16, y = var_2041_to_fp16)[name = string("normed_output_149_cast_fp16")]; + tensor const_63_to_fp16 = const()[name = string("const_63_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53271936)))]; + tensor normed_output_151_cast_fp16 = mul(x = normed_output_149_cast_fp16, y = const_63_to_fp16)[name = string("normed_output_151_cast_fp16")]; + fp16 var_1986_to_fp16 = const()[name = string("op_1986_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_441_cast_fp16 = mul(x = normed_output_151_cast_fp16, y = var_1986_to_fp16)[name = string("hidden_states_441_cast_fp16")]; + tensor hidden_states_443_cast_fp16 = add(x = hidden_states_441_cast_fp16, y = normed_output_143_cast_fp16)[name = string("hidden_states_443_cast_fp16")]; + fp16 var_2048_to_fp16 = const()[name = string("op_2048_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_2049_to_fp16 = const()[name = string("op_2049_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_118_cast_fp16 = clip(alpha = var_2048_to_fp16, beta = var_2049_to_fp16, x = hidden_states_443_cast_fp16)[name = string("clip_118_cast_fp16")]; + string clip_118_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_118_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2051 = const()[name = string("op_2051"), val = fp32(-0x1p-1)]; + fp32 var_2055_promoted = const()[name = string("op_2055_promoted"), val = fp32(0x1p+1)]; + tensor clip_118_cast_fp16_to_fp32 = cast(dtype = clip_118_cast_fp16_to_fp32_dtype_0, x = clip_118_cast_fp16)[name = string("cast_349")]; + tensor var_2061 = pow(x = clip_118_cast_fp16_to_fp32, y = var_2055_promoted)[name = string("op_2061")]; + tensor var_2063_axes_0 = const()[name = string("op_2063_axes_0"), val = tensor([-1])]; + bool var_2063_keep_dims_0 = const()[name = string("op_2063_keep_dims_0"), val = bool(true)]; + tensor var_2063 = reduce_mean(axes = var_2063_axes_0, keep_dims = var_2063_keep_dims_0, x = var_2061)[name = string("op_2063")]; + string var_2063_to_fp16_dtype_0 = const()[name = string("op_2063_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2064_to_fp16 = const()[name = string("op_2064_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2063_to_fp16 = cast(dtype = var_2063_to_fp16_dtype_0, x = var_2063)[name = string("cast_348")]; + tensor mean_squared_77_cast_fp16 = add(x = var_2063_to_fp16, y = var_2064_to_fp16)[name = string("mean_squared_77_cast_fp16")]; + string mean_squared_77_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_77_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_77_cast_fp16_to_fp32 = cast(dtype = mean_squared_77_cast_fp16_to_fp32_dtype_0, x = mean_squared_77_cast_fp16)[name = string("cast_347")]; + tensor var_2066 = pow(x = mean_squared_77_cast_fp16_to_fp32, y = var_2051)[name = string("op_2066")]; + string var_2066_to_fp16_dtype_0 = const()[name = string("op_2066_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2066_to_fp16 = cast(dtype = var_2066_to_fp16_dtype_0, x = var_2066)[name = string("cast_346")]; + tensor normed_output_153_cast_fp16 = mul(x = clip_118_cast_fp16, y = var_2066_to_fp16)[name = string("normed_output_153_cast_fp16")]; + tensor const_64_to_fp16 = const()[name = string("const_64_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53274048)))]; + tensor normed_output_155_cast_fp16 = mul(x = normed_output_153_cast_fp16, y = const_64_to_fp16)[name = string("normed_output_155_cast_fp16")]; + int32 var_2072 = const()[name = string("op_2072"), val = int32(-1)]; + fp32 var_2073 = const()[name = string("op_2073"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_4_q_proj_input_min_to_fp16 = const()[name = string("self_attns_4_q_proj_input_min_to_fp16"), val = fp16(-0x1.54p+3)]; + fp16 self_attns_4_q_proj_input_max_to_fp16 = const()[name = string("self_attns_4_q_proj_input_max_to_fp16"), val = fp16(0x1.52p+3)]; + tensor clip_119_cast_fp16 = clip(alpha = self_attns_4_q_proj_input_min_to_fp16, beta = self_attns_4_q_proj_input_max_to_fp16, x = normed_output_155_cast_fp16)[name = string("clip_119_cast_fp16")]; + tensor self_attns_4_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53276160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53800512))))[name = string("self_attns_4_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_47_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_4_q_proj_linear_weight_to_fp16_palettized, x = clip_119_cast_fp16)[name = string("linear_47_cast_fp16")]; + fp16 self_attns_4_q_proj_output_min_to_fp16 = const()[name = string("self_attns_4_q_proj_output_min_to_fp16"), val = fp16(-0x1.3p+4)]; + fp16 self_attns_4_q_proj_output_max_to_fp16 = const()[name = string("self_attns_4_q_proj_output_max_to_fp16"), val = fp16(0x1.2ep+4)]; + tensor clip_120_cast_fp16 = clip(alpha = self_attns_4_q_proj_output_min_to_fp16, beta = self_attns_4_q_proj_output_max_to_fp16, x = linear_47_cast_fp16)[name = string("clip_120_cast_fp16")]; + tensor var_2117 = const()[name = string("op_2117"), val = tensor([1, 50, 8, 128])]; + tensor q_9_cast_fp16 = reshape(shape = var_2117, x = clip_120_cast_fp16)[name = string("q_9_cast_fp16")]; + tensor self_attns_4_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53801600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54325952))))[name = string("self_attns_4_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_4_k_proj_linear_weight_to_fp16_palettized, x = clip_119_cast_fp16)[name = string("linear_48_cast_fp16")]; + fp16 self_attns_4_k_proj_output_min_to_fp16 = const()[name = string("self_attns_4_k_proj_output_min_to_fp16"), val = fp16(-0x1.3p+4)]; + fp16 self_attns_4_k_proj_output_max_to_fp16 = const()[name = string("self_attns_4_k_proj_output_max_to_fp16"), val = fp16(0x1.2ep+4)]; + tensor clip_122_cast_fp16 = clip(alpha = self_attns_4_k_proj_output_min_to_fp16, beta = self_attns_4_k_proj_output_max_to_fp16, x = linear_48_cast_fp16)[name = string("clip_122_cast_fp16")]; + tensor var_2129 = const()[name = string("op_2129"), val = tensor([1, 50, 8, 128])]; + tensor k_9_cast_fp16 = reshape(shape = var_2129, x = clip_122_cast_fp16)[name = string("k_9_cast_fp16")]; + tensor self_attns_4_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54327040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54851392))))[name = string("self_attns_4_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_4_v_proj_linear_weight_to_fp16_palettized, x = clip_119_cast_fp16)[name = string("linear_49_cast_fp16")]; + fp16 self_attns_4_v_proj_output_min_to_fp16 = const()[name = string("self_attns_4_v_proj_output_min_to_fp16"), val = fp16(-0x1.3p+4)]; + fp16 self_attns_4_v_proj_output_max_to_fp16 = const()[name = string("self_attns_4_v_proj_output_max_to_fp16"), val = fp16(0x1.2ep+4)]; + tensor clip_124_cast_fp16 = clip(alpha = self_attns_4_v_proj_output_min_to_fp16, beta = self_attns_4_v_proj_output_max_to_fp16, x = linear_49_cast_fp16)[name = string("clip_124_cast_fp16")]; + tensor var_2141 = const()[name = string("op_2141"), val = tensor([1, 50, 8, 128])]; + tensor input_201_cast_fp16 = reshape(shape = var_2141, x = clip_124_cast_fp16)[name = string("input_201_cast_fp16")]; + fp16 var_2143_to_fp16 = const()[name = string("op_2143_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_2144_cast_fp16 = mul(x = q_9_cast_fp16, y = var_2143_to_fp16)[name = string("op_2144_cast_fp16")]; + tensor var_2145_to_fp16 = const()[name = string("op_2145_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54852480)))]; + tensor input_197_cast_fp16 = mul(x = var_2144_cast_fp16, y = var_2145_to_fp16)[name = string("input_197_cast_fp16")]; + fp16 var_2147_to_fp16 = const()[name = string("op_2147_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_199_cast_fp16 = mul(x = k_9_cast_fp16, y = var_2147_to_fp16)[name = string("input_199_cast_fp16")]; + tensor q_padded_9_pad_0 = const()[name = string("q_padded_9_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_9_mode_0 = const()[name = string("q_padded_9_mode_0"), val = string("constant")]; + fp16 const_65_to_fp16 = const()[name = string("const_65_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_9_cast_fp16 = pad(constant_val = const_65_to_fp16, mode = q_padded_9_mode_0, pad = q_padded_9_pad_0, x = input_197_cast_fp16)[name = string("q_padded_9_cast_fp16")]; + tensor var_2151 = const()[name = string("op_2151"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_9_cast_fp16 = reshape(shape = var_2151, x = q_padded_9_cast_fp16)[name = string("q_blocks_9_cast_fp16")]; + tensor k_padded_9_pad_0 = const()[name = string("k_padded_9_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_9_mode_0 = const()[name = string("k_padded_9_mode_0"), val = string("constant")]; + fp16 const_66_to_fp16 = const()[name = string("const_66_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_9_cast_fp16 = pad(constant_val = const_66_to_fp16, mode = k_padded_9_mode_0, pad = k_padded_9_pad_0, x = input_199_cast_fp16)[name = string("k_padded_9_cast_fp16")]; + tensor v_padded_9_pad_0 = const()[name = string("v_padded_9_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_9_mode_0 = const()[name = string("v_padded_9_mode_0"), val = string("constant")]; + fp16 const_67_to_fp16 = const()[name = string("const_67_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_9_cast_fp16 = pad(constant_val = const_67_to_fp16, mode = v_padded_9_mode_0, pad = v_padded_9_pad_0, x = input_201_cast_fp16)[name = string("v_padded_9_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = string("op_2158_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2158_end_0 = const()[name = string("op_2158_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_2158_end_mask_0 = const()[name = string("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = k_padded_9_cast_fp16)[name = string("op_2158_cast_fp16")]; + tensor var_2160_begin_0 = const()[name = string("op_2160_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_2160_end_0 = const()[name = string("op_2160_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_2160_end_mask_0 = const()[name = string("op_2160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2160_cast_fp16 = slice_by_index(begin = var_2160_begin_0, end = var_2160_end_0, end_mask = var_2160_end_mask_0, x = k_padded_9_cast_fp16)[name = string("op_2160_cast_fp16")]; + tensor var_2162_begin_0 = const()[name = string("op_2162_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_2162_end_0 = const()[name = string("op_2162_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_2162_end_mask_0 = const()[name = string("op_2162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2162_cast_fp16 = slice_by_index(begin = var_2162_begin_0, end = var_2162_end_0, end_mask = var_2162_end_mask_0, x = k_padded_9_cast_fp16)[name = string("op_2162_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = string("op_2164_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_2164_end_0 = const()[name = string("op_2164_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_2164_end_mask_0 = const()[name = string("op_2164_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_padded_9_cast_fp16)[name = string("op_2164_cast_fp16")]; + tensor var_2166_begin_0 = const()[name = string("op_2166_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_2166_end_0 = const()[name = string("op_2166_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_2166_end_mask_0 = const()[name = string("op_2166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2166_cast_fp16 = slice_by_index(begin = var_2166_begin_0, end = var_2166_end_0, end_mask = var_2166_end_mask_0, x = k_padded_9_cast_fp16)[name = string("op_2166_cast_fp16")]; + int32 k_blocks_9_axis_0 = const()[name = string("k_blocks_9_axis_0"), val = int32(1)]; + tensor k_blocks_9_cast_fp16 = stack(axis = k_blocks_9_axis_0, values = (var_2158_cast_fp16, var_2160_cast_fp16, var_2162_cast_fp16, var_2164_cast_fp16, var_2166_cast_fp16))[name = string("k_blocks_9_cast_fp16")]; + tensor var_2170_begin_0 = const()[name = string("op_2170_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2170_end_0 = const()[name = string("op_2170_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_2170_end_mask_0 = const()[name = string("op_2170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2170_cast_fp16 = slice_by_index(begin = var_2170_begin_0, end = var_2170_end_0, end_mask = var_2170_end_mask_0, x = v_padded_9_cast_fp16)[name = string("op_2170_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = string("op_2172_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_2172_end_0 = const()[name = string("op_2172_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_2172_end_mask_0 = const()[name = string("op_2172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = v_padded_9_cast_fp16)[name = string("op_2172_cast_fp16")]; + tensor var_2174_begin_0 = const()[name = string("op_2174_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_2174_end_0 = const()[name = string("op_2174_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_2174_end_mask_0 = const()[name = string("op_2174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2174_cast_fp16 = slice_by_index(begin = var_2174_begin_0, end = var_2174_end_0, end_mask = var_2174_end_mask_0, x = v_padded_9_cast_fp16)[name = string("op_2174_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = string("op_2176_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_2176_end_0 = const()[name = string("op_2176_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_2176_end_mask_0 = const()[name = string("op_2176_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = v_padded_9_cast_fp16)[name = string("op_2176_cast_fp16")]; + tensor var_2178_begin_0 = const()[name = string("op_2178_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_2178_end_0 = const()[name = string("op_2178_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_2178_end_mask_0 = const()[name = string("op_2178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2178_cast_fp16 = slice_by_index(begin = var_2178_begin_0, end = var_2178_end_0, end_mask = var_2178_end_mask_0, x = v_padded_9_cast_fp16)[name = string("op_2178_cast_fp16")]; + int32 v_blocks_9_axis_0 = const()[name = string("v_blocks_9_axis_0"), val = int32(1)]; + tensor v_blocks_9_cast_fp16 = stack(axis = v_blocks_9_axis_0, values = (var_2170_cast_fp16, var_2172_cast_fp16, var_2174_cast_fp16, var_2176_cast_fp16, var_2178_cast_fp16))[name = string("v_blocks_9_cast_fp16")]; + tensor var_2186 = const()[name = string("op_2186"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_2188 = const()[name = string("op_2188"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_9_transpose_x_0 = const()[name = string("matrix_ac_9_transpose_x_0"), val = bool(false)]; + bool matrix_ac_9_transpose_y_0 = const()[name = string("matrix_ac_9_transpose_y_0"), val = bool(false)]; + tensor queries_9_cast_fp16 = transpose(perm = var_2186, x = q_blocks_9_cast_fp16)[name = string("transpose_46")]; + tensor keys_t_9_cast_fp16 = transpose(perm = var_2188, x = k_blocks_9_cast_fp16)[name = string("transpose_47")]; + tensor matrix_ac_9_cast_fp16 = matmul(transpose_x = matrix_ac_9_transpose_x_0, transpose_y = matrix_ac_9_transpose_y_0, x = queries_9_cast_fp16, y = keys_t_9_cast_fp16)[name = string("matrix_ac_9_cast_fp16")]; + tensor var_2191 = const()[name = string("op_2191"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_9_cast_fp16 = reshape(shape = var_2191, x = queries_9_cast_fp16)[name = string("q_flat_9_cast_fp16")]; + bool matrix_bd_41_transpose_x_0 = const()[name = string("matrix_bd_41_transpose_x_0"), val = bool(false)]; + bool matrix_bd_41_transpose_y_0 = const()[name = string("matrix_bd_41_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_9_to_fp16 = const()[name = string("rel_k_t_9_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54852800)))]; + tensor matrix_bd_41_cast_fp16 = matmul(transpose_x = matrix_bd_41_transpose_x_0, transpose_y = matrix_bd_41_transpose_y_0, x = q_flat_9_cast_fp16, y = rel_k_t_9_to_fp16)[name = string("matrix_bd_41_cast_fp16")]; + tensor var_2196 = const()[name = string("op_2196"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_203_cast_fp16 = reshape(shape = var_2196, x = matrix_bd_41_cast_fp16)[name = string("input_203_cast_fp16")]; + tensor matrix_bd_43_pad_0 = const()[name = string("matrix_bd_43_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54879488)))]; + string matrix_bd_43_mode_0 = const()[name = string("matrix_bd_43_mode_0"), val = string("constant")]; + fp16 const_69_to_fp16 = const()[name = string("const_69_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_43_cast_fp16 = pad(constant_val = const_69_to_fp16, mode = matrix_bd_43_mode_0, pad = matrix_bd_43_pad_0, x = input_203_cast_fp16)[name = string("matrix_bd_43_cast_fp16")]; + tensor var_2200 = const()[name = string("op_2200"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_45_cast_fp16 = reshape(shape = var_2200, x = matrix_bd_43_cast_fp16)[name = string("matrix_bd_45_cast_fp16")]; + tensor matrix_bd_47_begin_0 = const()[name = string("matrix_bd_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_47_end_0 = const()[name = string("matrix_bd_47_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_47_end_mask_0 = const()[name = string("matrix_bd_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_47_cast_fp16 = slice_by_index(begin = matrix_bd_47_begin_0, end = matrix_bd_47_end_0, end_mask = matrix_bd_47_end_mask_0, x = matrix_bd_45_cast_fp16)[name = string("matrix_bd_47_cast_fp16")]; + tensor var_2206 = const()[name = string("op_2206"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_49_cast_fp16 = reshape(shape = var_2206, x = matrix_bd_47_cast_fp16)[name = string("matrix_bd_49_cast_fp16")]; + tensor attn_25_cast_fp16 = add(x = matrix_ac_9_cast_fp16, y = matrix_bd_49_cast_fp16)[name = string("attn_25_cast_fp16")]; + fp16 _inversed_2209_y_0_to_fp16 = const()[name = string("_inversed_2209_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_2209_cast_fp16 = mul(x = attn_25_cast_fp16, y = _inversed_2209_y_0_to_fp16)[name = string("_inversed_2209_cast_fp16")]; + string _inversed_2209_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_2209_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_2209_cast_fp16_to_fp32 = cast(dtype = _inversed_2209_cast_fp16_to_fp32_dtype_0, x = _inversed_2209_cast_fp16)[name = string("cast_345")]; + tensor var_2210 = tanh(x = _inversed_2209_cast_fp16_to_fp32)[name = string("op_2210")]; + string var_2210_to_fp16_dtype_0 = const()[name = string("op_2210_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_4_softcap_to_fp16 = const()[name = string("self_attns_4_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_2210_to_fp16 = cast(dtype = var_2210_to_fp16_dtype_0, x = var_2210)[name = string("cast_344")]; + tensor attn_27_cast_fp16 = mul(x = var_2210_to_fp16, y = self_attns_4_softcap_to_fp16)[name = string("attn_27_cast_fp16")]; + string attn_27_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_27_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_27_cast_fp16_to_fp32 = cast(dtype = attn_27_cast_fp16_to_fp32_dtype_0, x = attn_27_cast_fp16)[name = string("cast_343")]; + tensor input_205 = select(a = var_2073, b = attn_27_cast_fp16_to_fp32, cond = var_460)[name = string("input_205")]; + tensor var_2214 = softmax(axis = var_2072, x = input_205)[name = string("op_2214")]; + tensor var_2216 = const()[name = string("op_2216"), val = tensor([0, 3, 1, -3, -1])]; + bool out_25_transpose_x_0 = const()[name = string("out_25_transpose_x_0"), val = bool(false)]; + bool out_25_transpose_y_0 = const()[name = string("out_25_transpose_y_0"), val = bool(false)]; + string var_2214_to_fp16_dtype_0 = const()[name = string("op_2214_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_9_cast_fp16 = transpose(perm = var_2216, x = v_blocks_9_cast_fp16)[name = string("transpose_45")]; + tensor var_2214_to_fp16 = cast(dtype = var_2214_to_fp16_dtype_0, x = var_2214)[name = string("cast_342")]; + tensor out_25_cast_fp16 = matmul(transpose_x = out_25_transpose_x_0, transpose_y = out_25_transpose_y_0, x = var_2214_to_fp16, y = values_t_9_cast_fp16)[name = string("out_25_cast_fp16")]; + tensor var_2219 = const()[name = string("op_2219"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_2221 = const()[name = string("op_2221"), val = tensor([1, 60, 1024])]; + tensor var_2220_cast_fp16 = transpose(perm = var_2219, x = out_25_cast_fp16)[name = string("transpose_44")]; + tensor out_27_cast_fp16 = reshape(shape = var_2221, x = var_2220_cast_fp16)[name = string("out_27_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = string("op_2224_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2224_end_0 = const()[name = string("op_2224_end_0"), val = tensor([1, 50, 1024])]; + tensor var_2224_end_mask_0 = const()[name = string("op_2224_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = out_27_cast_fp16)[name = string("op_2224_cast_fp16")]; + fp16 self_attns_4_post_input_min_to_fp16 = const()[name = string("self_attns_4_post_input_min_to_fp16"), val = fp16(-0x1.28p+4)]; + fp16 self_attns_4_post_input_max_to_fp16 = const()[name = string("self_attns_4_post_input_max_to_fp16"), val = fp16(0x1.26p+4)]; + tensor clip_125_cast_fp16 = clip(alpha = self_attns_4_post_input_min_to_fp16, beta = self_attns_4_post_input_max_to_fp16, x = var_2224_cast_fp16)[name = string("clip_125_cast_fp16")]; + tensor self_attns_4_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54879616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55403968))))[name = string("self_attns_4_post_linear_weight_to_fp16_palettized")]; + tensor linear_51_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_4_post_linear_weight_to_fp16_palettized, x = clip_125_cast_fp16)[name = string("linear_51_cast_fp16")]; + fp16 self_attns_4_post_output_min_to_fp16 = const()[name = string("self_attns_4_post_output_min_to_fp16"), val = fp16(-0x1.acp+5)]; + fp16 self_attns_4_post_output_max_to_fp16 = const()[name = string("self_attns_4_post_output_max_to_fp16"), val = fp16(0x1.a8p+5)]; + tensor clip_126_cast_fp16 = clip(alpha = self_attns_4_post_output_min_to_fp16, beta = self_attns_4_post_output_max_to_fp16, x = linear_51_cast_fp16)[name = string("clip_126_cast_fp16")]; + fp16 var_2236_to_fp16 = const()[name = string("op_2236_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_2237_to_fp16 = const()[name = string("op_2237_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_127_cast_fp16 = clip(alpha = var_2236_to_fp16, beta = var_2237_to_fp16, x = clip_126_cast_fp16)[name = string("clip_127_cast_fp16")]; + string clip_127_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_127_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2239 = const()[name = string("op_2239"), val = fp32(-0x1p-1)]; + fp32 var_2243_promoted = const()[name = string("op_2243_promoted"), val = fp32(0x1p+1)]; + tensor clip_127_cast_fp16_to_fp32 = cast(dtype = clip_127_cast_fp16_to_fp32_dtype_0, x = clip_127_cast_fp16)[name = string("cast_341")]; + tensor var_2249 = pow(x = clip_127_cast_fp16_to_fp32, y = var_2243_promoted)[name = string("op_2249")]; + tensor var_2251_axes_0 = const()[name = string("op_2251_axes_0"), val = tensor([-1])]; + bool var_2251_keep_dims_0 = const()[name = string("op_2251_keep_dims_0"), val = bool(true)]; + tensor var_2251 = reduce_mean(axes = var_2251_axes_0, keep_dims = var_2251_keep_dims_0, x = var_2249)[name = string("op_2251")]; + string var_2251_to_fp16_dtype_0 = const()[name = string("op_2251_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2252_to_fp16 = const()[name = string("op_2252_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2251_to_fp16 = cast(dtype = var_2251_to_fp16_dtype_0, x = var_2251)[name = string("cast_340")]; + tensor mean_squared_79_cast_fp16 = add(x = var_2251_to_fp16, y = var_2252_to_fp16)[name = string("mean_squared_79_cast_fp16")]; + string mean_squared_79_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_79_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_79_cast_fp16_to_fp32 = cast(dtype = mean_squared_79_cast_fp16_to_fp32_dtype_0, x = mean_squared_79_cast_fp16)[name = string("cast_339")]; + tensor var_2254 = pow(x = mean_squared_79_cast_fp16_to_fp32, y = var_2239)[name = string("op_2254")]; + string var_2254_to_fp16_dtype_0 = const()[name = string("op_2254_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2254_to_fp16 = cast(dtype = var_2254_to_fp16_dtype_0, x = var_2254)[name = string("cast_338")]; + tensor normed_output_157_cast_fp16 = mul(x = clip_127_cast_fp16, y = var_2254_to_fp16)[name = string("normed_output_157_cast_fp16")]; + tensor const_70_to_fp16 = const()[name = string("const_70_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55405056)))]; + tensor normed_output_159_cast_fp16 = mul(x = normed_output_157_cast_fp16, y = const_70_to_fp16)[name = string("normed_output_159_cast_fp16")]; + tensor hidden_states_469_cast_fp16 = add(x = normed_output_159_cast_fp16, y = hidden_states_443_cast_fp16)[name = string("hidden_states_469_cast_fp16")]; + string hidden_states_469_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_469_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2261 = const()[name = string("op_2261"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_2262 = const()[name = string("op_2262"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_2274 = const()[name = string("op_2274"), val = fp32(-0x1p-1)]; + fp32 var_2270_promoted = const()[name = string("op_2270_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_469_cast_fp16_to_fp32 = cast(dtype = hidden_states_469_cast_fp16_to_fp32_dtype_0, x = hidden_states_469_cast_fp16)[name = string("cast_337")]; + tensor var_2282 = pow(x = hidden_states_469_cast_fp16_to_fp32, y = var_2270_promoted)[name = string("op_2282")]; + tensor var_2284_axes_0 = const()[name = string("op_2284_axes_0"), val = tensor([-1])]; + bool var_2284_keep_dims_0 = const()[name = string("op_2284_keep_dims_0"), val = bool(true)]; + tensor var_2284 = reduce_mean(axes = var_2284_axes_0, keep_dims = var_2284_keep_dims_0, x = var_2282)[name = string("op_2284")]; + string var_2284_to_fp16_dtype_0 = const()[name = string("op_2284_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2285_to_fp16 = const()[name = string("op_2285_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2284_to_fp16 = cast(dtype = var_2284_to_fp16_dtype_0, x = var_2284)[name = string("cast_336")]; + tensor mean_squared_81_cast_fp16 = add(x = var_2284_to_fp16, y = var_2285_to_fp16)[name = string("mean_squared_81_cast_fp16")]; + string mean_squared_81_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_81_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_81_cast_fp16_to_fp32 = cast(dtype = mean_squared_81_cast_fp16_to_fp32_dtype_0, x = mean_squared_81_cast_fp16)[name = string("cast_335")]; + tensor var_2287 = pow(x = mean_squared_81_cast_fp16_to_fp32, y = var_2274)[name = string("op_2287")]; + string var_2287_to_fp16_dtype_0 = const()[name = string("op_2287_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2287_to_fp16 = cast(dtype = var_2287_to_fp16_dtype_0, x = var_2287)[name = string("cast_334")]; + tensor normed_output_161_cast_fp16 = mul(x = hidden_states_469_cast_fp16, y = var_2287_to_fp16)[name = string("normed_output_161_cast_fp16")]; + tensor const_71_to_fp16 = const()[name = string("const_71_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55407168)))]; + tensor normed_output_163_cast_fp16 = mul(x = normed_output_161_cast_fp16, y = const_71_to_fp16)[name = string("normed_output_163_cast_fp16")]; + fp16 lconv1ds_4_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_4_linear_start_input_min_to_fp16"), val = fp16(-0x1.4ep+3)]; + fp16 lconv1ds_4_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_4_linear_start_input_max_to_fp16"), val = fp16(0x1.4ap+3)]; + tensor clip_128_cast_fp16 = clip(alpha = lconv1ds_4_linear_start_input_min_to_fp16, beta = lconv1ds_4_linear_start_input_max_to_fp16, x = normed_output_163_cast_fp16)[name = string("clip_128_cast_fp16")]; + tensor lconv1ds_4_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55409280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56457920))))[name = string("lconv1ds_4_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_52_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_4_linear_start_linear_weight_to_fp16_palettized, x = clip_128_cast_fp16)[name = string("linear_52_cast_fp16")]; + fp16 lconv1ds_4_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_4_linear_start_output_min_to_fp16"), val = fp16(-0x1.7cp+4)]; + fp16 lconv1ds_4_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_4_linear_start_output_max_to_fp16"), val = fp16(0x1.7ap+4)]; + tensor clip_129_cast_fp16 = clip(alpha = lconv1ds_4_linear_start_output_min_to_fp16, beta = lconv1ds_4_linear_start_output_max_to_fp16, x = linear_52_cast_fp16)[name = string("clip_129_cast_fp16")]; + int32 hidden_states_477_split_num_splits_0 = const()[name = string("hidden_states_477_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_477_split_axis_0 = const()[name = string("hidden_states_477_split_axis_0"), val = int32(-1)]; + tensor hidden_states_477_split_cast_fp16_0, tensor hidden_states_477_split_cast_fp16_1 = split(axis = hidden_states_477_split_axis_0, num_splits = hidden_states_477_split_num_splits_0, x = clip_129_cast_fp16)[name = string("hidden_states_477_split_cast_fp16")]; + tensor hidden_states_477_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_477_split_cast_fp16_1)[name = string("hidden_states_477_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_477_cast_fp16 = mul(x = hidden_states_477_split_cast_fp16_0, y = hidden_states_477_split_1_sigmoid_cast_fp16)[name = string("hidden_states_477_cast_fp16")]; + tensor input_213_perm_0 = const()[name = string("input_213_perm_0"), val = tensor([0, 2, 1])]; + tensor input_215_pad_0 = const()[name = string("input_215_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_215_mode_0 = const()[name = string("input_215_mode_0"), val = string("constant")]; + fp16 const_72_to_fp16 = const()[name = string("const_72_to_fp16"), val = fp16(0x0p+0)]; + tensor input_213_cast_fp16 = transpose(perm = input_213_perm_0, x = hidden_states_477_cast_fp16)[name = string("transpose_43")]; + tensor input_215_cast_fp16 = pad(constant_val = const_72_to_fp16, mode = input_215_mode_0, pad = input_215_pad_0, x = input_213_cast_fp16)[name = string("input_215_cast_fp16")]; + string var_2313_pad_type_0 = const()[name = string("op_2313_pad_type_0"), val = string("valid")]; + int32 var_2313_groups_0 = const()[name = string("op_2313_groups_0"), val = int32(1024)]; + tensor var_2313_strides_0 = const()[name = string("op_2313_strides_0"), val = tensor([1])]; + tensor var_2313_pad_0 = const()[name = string("op_2313_pad_0"), val = tensor([0, 0])]; + tensor var_2313_dilations_0 = const()[name = string("op_2313_dilations_0"), val = tensor([1])]; + tensor lconv1ds_4_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56460032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56462656))))[name = string("lconv1ds_4_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_2313_cast_fp16 = conv(dilations = var_2313_dilations_0, groups = var_2313_groups_0, pad = var_2313_pad_0, pad_type = var_2313_pad_type_0, strides = var_2313_strides_0, weight = lconv1ds_4_depthwise_conv1d_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = string("op_2313_cast_fp16")]; + tensor hidden_states_479_perm_0 = const()[name = string("hidden_states_479_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_479_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_479_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_479_cast_fp16 = transpose(perm = hidden_states_479_perm_0, x = var_2313_cast_fp16)[name = string("transpose_42")]; + tensor hidden_states_479_cast_fp16_to_fp32 = cast(dtype = hidden_states_479_cast_fp16_to_fp32_dtype_0, x = hidden_states_479_cast_fp16)[name = string("cast_333")]; + tensor clip_130 = clip(alpha = var_2262, beta = var_2261, x = hidden_states_479_cast_fp16_to_fp32)[name = string("clip_130")]; + fp32 var_2270_promoted_1 = const()[name = string("op_2270_promoted_1"), val = fp32(0x1p+1)]; + tensor var_2318 = pow(x = clip_130, y = var_2270_promoted_1)[name = string("op_2318")]; + tensor var_2320_axes_0 = const()[name = string("op_2320_axes_0"), val = tensor([-1])]; + bool var_2320_keep_dims_0 = const()[name = string("op_2320_keep_dims_0"), val = bool(true)]; + tensor var_2320 = reduce_mean(axes = var_2320_axes_0, keep_dims = var_2320_keep_dims_0, x = var_2318)[name = string("op_2320")]; + string var_2320_to_fp16_dtype_0 = const()[name = string("op_2320_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2321_to_fp16 = const()[name = string("op_2321_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2320_to_fp16 = cast(dtype = var_2320_to_fp16_dtype_0, x = var_2320)[name = string("cast_332")]; + tensor mean_squared_83_cast_fp16 = add(x = var_2320_to_fp16, y = var_2321_to_fp16)[name = string("mean_squared_83_cast_fp16")]; + string mean_squared_83_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_83_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_83_cast_fp16_to_fp32 = cast(dtype = mean_squared_83_cast_fp16_to_fp32_dtype_0, x = mean_squared_83_cast_fp16)[name = string("cast_331")]; + tensor var_2323 = pow(x = mean_squared_83_cast_fp16_to_fp32, y = var_2274)[name = string("op_2323")]; + string clip_130_to_fp16_dtype_0 = const()[name = string("clip_130_to_fp16_dtype_0"), val = string("fp16")]; + string var_2323_to_fp16_dtype_0 = const()[name = string("op_2323_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_130_to_fp16 = cast(dtype = clip_130_to_fp16_dtype_0, x = clip_130)[name = string("cast_329")]; + tensor var_2323_to_fp16 = cast(dtype = var_2323_to_fp16_dtype_0, x = var_2323)[name = string("cast_330")]; + tensor normed_output_165_cast_fp16 = mul(x = clip_130_to_fp16, y = var_2323_to_fp16)[name = string("normed_output_165_cast_fp16")]; + tensor const_73_to_fp16 = const()[name = string("const_73_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56463744)))]; + tensor normed_output_167_cast_fp16 = mul(x = normed_output_165_cast_fp16, y = const_73_to_fp16)[name = string("normed_output_167_cast_fp16")]; + tensor hidden_states_485_cast_fp16 = silu(x = normed_output_167_cast_fp16)[name = string("hidden_states_485_cast_fp16")]; + fp16 lconv1ds_4_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_4_linear_end_input_min_to_fp16"), val = fp16(-0x1.5cp+3)]; + fp16 lconv1ds_4_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_4_linear_end_input_max_to_fp16"), val = fp16(0x1.5ap+3)]; + tensor clip_131_cast_fp16 = clip(alpha = lconv1ds_4_linear_end_input_min_to_fp16, beta = lconv1ds_4_linear_end_input_max_to_fp16, x = hidden_states_485_cast_fp16)[name = string("clip_131_cast_fp16")]; + tensor lconv1ds_4_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56465856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56990208))))[name = string("lconv1ds_4_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_53_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_4_linear_end_linear_weight_to_fp16_palettized, x = clip_131_cast_fp16)[name = string("linear_53_cast_fp16")]; + fp16 lconv1ds_4_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_4_linear_end_output_min_to_fp16"), val = fp16(-0x1.f4p+2)]; + fp16 lconv1ds_4_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_4_linear_end_output_max_to_fp16"), val = fp16(0x1.fp+2)]; + tensor clip_132_cast_fp16 = clip(alpha = lconv1ds_4_linear_end_output_min_to_fp16, beta = lconv1ds_4_linear_end_output_max_to_fp16, x = linear_53_cast_fp16)[name = string("clip_132_cast_fp16")]; + tensor hidden_states_491_cast_fp16 = add(x = clip_132_cast_fp16, y = hidden_states_469_cast_fp16)[name = string("hidden_states_491_cast_fp16")]; + string hidden_states_491_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_491_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2347 = const()[name = string("op_2347"), val = fp32(-0x1p-1)]; + fp32 var_2348 = const()[name = string("op_2348"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_2349 = const()[name = string("op_2349"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_491_cast_fp16_to_fp32 = cast(dtype = hidden_states_491_cast_fp16_to_fp32_dtype_0, x = hidden_states_491_cast_fp16)[name = string("cast_328")]; + tensor clip_133 = clip(alpha = var_2349, beta = var_2348, x = hidden_states_491_cast_fp16_to_fp32)[name = string("clip_133")]; + fp32 var_2343_promoted = const()[name = string("op_2343_promoted"), val = fp32(0x1p+1)]; + tensor var_2357 = pow(x = clip_133, y = var_2343_promoted)[name = string("op_2357")]; + tensor var_2359_axes_0 = const()[name = string("op_2359_axes_0"), val = tensor([-1])]; + bool var_2359_keep_dims_0 = const()[name = string("op_2359_keep_dims_0"), val = bool(true)]; + tensor var_2359 = reduce_mean(axes = var_2359_axes_0, keep_dims = var_2359_keep_dims_0, x = var_2357)[name = string("op_2359")]; + string var_2359_to_fp16_dtype_0 = const()[name = string("op_2359_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2360_to_fp16 = const()[name = string("op_2360_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2359_to_fp16 = cast(dtype = var_2359_to_fp16_dtype_0, x = var_2359)[name = string("cast_327")]; + tensor mean_squared_85_cast_fp16 = add(x = var_2359_to_fp16, y = var_2360_to_fp16)[name = string("mean_squared_85_cast_fp16")]; + string mean_squared_85_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_85_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_85_cast_fp16_to_fp32 = cast(dtype = mean_squared_85_cast_fp16_to_fp32_dtype_0, x = mean_squared_85_cast_fp16)[name = string("cast_326")]; + tensor var_2362 = pow(x = mean_squared_85_cast_fp16_to_fp32, y = var_2347)[name = string("op_2362")]; + string clip_133_to_fp16_dtype_0 = const()[name = string("clip_133_to_fp16_dtype_0"), val = string("fp16")]; + string var_2362_to_fp16_dtype_0 = const()[name = string("op_2362_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_133_to_fp16 = cast(dtype = clip_133_to_fp16_dtype_0, x = clip_133)[name = string("cast_324")]; + tensor var_2362_to_fp16 = cast(dtype = var_2362_to_fp16_dtype_0, x = var_2362)[name = string("cast_325")]; + tensor normed_output_169_cast_fp16 = mul(x = clip_133_to_fp16, y = var_2362_to_fp16)[name = string("normed_output_169_cast_fp16")]; + tensor const_74_to_fp16 = const()[name = string("const_74_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56991296)))]; + tensor normed_output_171_cast_fp16 = mul(x = normed_output_169_cast_fp16, y = const_74_to_fp16)[name = string("normed_output_171_cast_fp16")]; + fp16 feed_forward2s_4_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_4_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.aap+3)]; + fp16 feed_forward2s_4_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_4_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.a6p+3)]; + tensor clip_134_cast_fp16 = clip(alpha = feed_forward2s_4_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_4_ffw_layer_1_input_max_to_fp16, x = normed_output_171_cast_fp16)[name = string("clip_134_cast_fp16")]; + tensor feed_forward2s_4_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56993408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59090624))))[name = string("feed_forward2s_4_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_54_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_4_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_134_cast_fp16)[name = string("linear_54_cast_fp16")]; + fp16 feed_forward2s_4_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_4_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.ep+4)]; + fp16 feed_forward2s_4_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_4_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.dcp+4)]; + tensor clip_135_cast_fp16 = clip(alpha = feed_forward2s_4_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_4_ffw_layer_1_output_max_to_fp16, x = linear_54_cast_fp16)[name = string("clip_135_cast_fp16")]; + tensor hidden_states_501_cast_fp16 = silu(x = clip_135_cast_fp16)[name = string("hidden_states_501_cast_fp16")]; + fp16 feed_forward2s_4_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_4_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.8ap+3)]; + fp16 feed_forward2s_4_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_4_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.88p+3)]; + tensor clip_136_cast_fp16 = clip(alpha = feed_forward2s_4_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_4_ffw_layer_2_input_max_to_fp16, x = hidden_states_501_cast_fp16)[name = string("clip_136_cast_fp16")]; + tensor feed_forward2s_4_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59094784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61192000))))[name = string("feed_forward2s_4_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_4_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_136_cast_fp16)[name = string("linear_55_cast_fp16")]; + fp16 feed_forward2s_4_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_4_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.2cp+6)]; + fp16 feed_forward2s_4_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_4_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.2ap+6)]; + tensor clip_137_cast_fp16 = clip(alpha = feed_forward2s_4_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_4_ffw_layer_2_output_max_to_fp16, x = linear_55_cast_fp16)[name = string("clip_137_cast_fp16")]; + string clip_137_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_137_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_137_cast_fp16_to_fp32 = cast(dtype = clip_137_cast_fp16_to_fp32_dtype_0, x = clip_137_cast_fp16)[name = string("cast_323")]; + tensor clip_138 = clip(alpha = var_2349, beta = var_2348, x = clip_137_cast_fp16_to_fp32)[name = string("clip_138")]; + fp32 var_2343_promoted_1 = const()[name = string("op_2343_promoted_1"), val = fp32(0x1p+1)]; + tensor var_2389 = pow(x = clip_138, y = var_2343_promoted_1)[name = string("op_2389")]; + tensor var_2391_axes_0 = const()[name = string("op_2391_axes_0"), val = tensor([-1])]; + bool var_2391_keep_dims_0 = const()[name = string("op_2391_keep_dims_0"), val = bool(true)]; + tensor var_2391 = reduce_mean(axes = var_2391_axes_0, keep_dims = var_2391_keep_dims_0, x = var_2389)[name = string("op_2391")]; + string var_2391_to_fp16_dtype_0 = const()[name = string("op_2391_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2392_to_fp16 = const()[name = string("op_2392_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2391_to_fp16 = cast(dtype = var_2391_to_fp16_dtype_0, x = var_2391)[name = string("cast_322")]; + tensor mean_squared_87_cast_fp16 = add(x = var_2391_to_fp16, y = var_2392_to_fp16)[name = string("mean_squared_87_cast_fp16")]; + string mean_squared_87_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_87_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_87_cast_fp16_to_fp32 = cast(dtype = mean_squared_87_cast_fp16_to_fp32_dtype_0, x = mean_squared_87_cast_fp16)[name = string("cast_321")]; + tensor var_2394 = pow(x = mean_squared_87_cast_fp16_to_fp32, y = var_2347)[name = string("op_2394")]; + string clip_138_to_fp16_dtype_0 = const()[name = string("clip_138_to_fp16_dtype_0"), val = string("fp16")]; + string var_2394_to_fp16_dtype_0 = const()[name = string("op_2394_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_138_to_fp16 = cast(dtype = clip_138_to_fp16_dtype_0, x = clip_138)[name = string("cast_319")]; + tensor var_2394_to_fp16 = cast(dtype = var_2394_to_fp16_dtype_0, x = var_2394)[name = string("cast_320")]; + tensor normed_output_173_cast_fp16 = mul(x = clip_138_to_fp16, y = var_2394_to_fp16)[name = string("normed_output_173_cast_fp16")]; + tensor const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61193088)))]; + tensor normed_output_175_cast_fp16 = mul(x = normed_output_173_cast_fp16, y = const_75_to_fp16)[name = string("normed_output_175_cast_fp16")]; + fp16 var_2339_to_fp16 = const()[name = string("op_2339_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_513_cast_fp16 = mul(x = normed_output_175_cast_fp16, y = var_2339_to_fp16)[name = string("hidden_states_513_cast_fp16")]; + tensor hidden_states_515_cast_fp16 = add(x = hidden_states_513_cast_fp16, y = hidden_states_491_cast_fp16)[name = string("hidden_states_515_cast_fp16")]; + fp16 var_2401_to_fp16 = const()[name = string("op_2401_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_2402_to_fp16 = const()[name = string("op_2402_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_139_cast_fp16 = clip(alpha = var_2401_to_fp16, beta = var_2402_to_fp16, x = hidden_states_515_cast_fp16)[name = string("clip_139_cast_fp16")]; + string clip_139_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_139_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2404 = const()[name = string("op_2404"), val = fp32(-0x1p-1)]; + fp32 var_2408_promoted = const()[name = string("op_2408_promoted"), val = fp32(0x1p+1)]; + tensor clip_139_cast_fp16_to_fp32 = cast(dtype = clip_139_cast_fp16_to_fp32_dtype_0, x = clip_139_cast_fp16)[name = string("cast_318")]; + tensor var_2414 = pow(x = clip_139_cast_fp16_to_fp32, y = var_2408_promoted)[name = string("op_2414")]; + tensor var_2416_axes_0 = const()[name = string("op_2416_axes_0"), val = tensor([-1])]; + bool var_2416_keep_dims_0 = const()[name = string("op_2416_keep_dims_0"), val = bool(true)]; + tensor var_2416 = reduce_mean(axes = var_2416_axes_0, keep_dims = var_2416_keep_dims_0, x = var_2414)[name = string("op_2416")]; + string var_2416_to_fp16_dtype_0 = const()[name = string("op_2416_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2417_to_fp16 = const()[name = string("op_2417_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2416_to_fp16 = cast(dtype = var_2416_to_fp16_dtype_0, x = var_2416)[name = string("cast_317")]; + tensor mean_squared_89_cast_fp16 = add(x = var_2416_to_fp16, y = var_2417_to_fp16)[name = string("mean_squared_89_cast_fp16")]; + string mean_squared_89_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_89_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_89_cast_fp16_to_fp32 = cast(dtype = mean_squared_89_cast_fp16_to_fp32_dtype_0, x = mean_squared_89_cast_fp16)[name = string("cast_316")]; + tensor var_2419 = pow(x = mean_squared_89_cast_fp16_to_fp32, y = var_2404)[name = string("op_2419")]; + string var_2419_to_fp16_dtype_0 = const()[name = string("op_2419_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2419_to_fp16 = cast(dtype = var_2419_to_fp16_dtype_0, x = var_2419)[name = string("cast_315")]; + tensor normed_output_177_cast_fp16 = mul(x = clip_139_cast_fp16, y = var_2419_to_fp16)[name = string("normed_output_177_cast_fp16")]; + tensor const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61195200)))]; + tensor normed_output_179_cast_fp16 = mul(x = normed_output_177_cast_fp16, y = const_76_to_fp16)[name = string("normed_output_179_cast_fp16")]; + string normed_output_179_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_179_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2432 = const()[name = string("op_2432"), val = fp32(-0x1p-1)]; + fp32 var_2433 = const()[name = string("op_2433"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_2434 = const()[name = string("op_2434"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_179_cast_fp16_to_fp32 = cast(dtype = normed_output_179_cast_fp16_to_fp32_dtype_0, x = normed_output_179_cast_fp16)[name = string("cast_314")]; + tensor clip_140 = clip(alpha = var_2434, beta = var_2433, x = normed_output_179_cast_fp16_to_fp32)[name = string("clip_140")]; + fp32 var_2428_promoted = const()[name = string("op_2428_promoted"), val = fp32(0x1p+1)]; + tensor var_2442 = pow(x = clip_140, y = var_2428_promoted)[name = string("op_2442")]; + tensor var_2444_axes_0 = const()[name = string("op_2444_axes_0"), val = tensor([-1])]; + bool var_2444_keep_dims_0 = const()[name = string("op_2444_keep_dims_0"), val = bool(true)]; + tensor var_2444 = reduce_mean(axes = var_2444_axes_0, keep_dims = var_2444_keep_dims_0, x = var_2442)[name = string("op_2444")]; + string var_2444_to_fp16_dtype_0 = const()[name = string("op_2444_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2444_to_fp16 = cast(dtype = var_2444_to_fp16_dtype_0, x = var_2444)[name = string("cast_313")]; + tensor mean_squared_91_cast_fp16 = add(x = var_2444_to_fp16, y = var_2445_to_fp16)[name = string("mean_squared_91_cast_fp16")]; + string mean_squared_91_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_91_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_91_cast_fp16_to_fp32 = cast(dtype = mean_squared_91_cast_fp16_to_fp32_dtype_0, x = mean_squared_91_cast_fp16)[name = string("cast_312")]; + tensor var_2447 = pow(x = mean_squared_91_cast_fp16_to_fp32, y = var_2432)[name = string("op_2447")]; + string clip_140_to_fp16_dtype_0 = const()[name = string("clip_140_to_fp16_dtype_0"), val = string("fp16")]; + string var_2447_to_fp16_dtype_0 = const()[name = string("op_2447_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_140_to_fp16 = cast(dtype = clip_140_to_fp16_dtype_0, x = clip_140)[name = string("cast_310")]; + tensor var_2447_to_fp16 = cast(dtype = var_2447_to_fp16_dtype_0, x = var_2447)[name = string("cast_311")]; + tensor normed_output_181_cast_fp16 = mul(x = clip_140_to_fp16, y = var_2447_to_fp16)[name = string("normed_output_181_cast_fp16")]; + tensor const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61197312)))]; + tensor normed_output_183_cast_fp16 = mul(x = normed_output_181_cast_fp16, y = const_77_to_fp16)[name = string("normed_output_183_cast_fp16")]; + fp16 feed_forward1s_5_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_5_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.9cp+3)]; + fp16 feed_forward1s_5_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_5_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.9ap+3)]; + tensor clip_141_cast_fp16 = clip(alpha = feed_forward1s_5_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_5_ffw_layer_1_input_max_to_fp16, x = normed_output_183_cast_fp16)[name = string("clip_141_cast_fp16")]; + tensor feed_forward1s_5_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61199424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63296640))))[name = string("feed_forward1s_5_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_56_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_5_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_141_cast_fp16)[name = string("linear_56_cast_fp16")]; + fp16 feed_forward1s_5_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_5_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.dcp+4)]; + fp16 feed_forward1s_5_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_5_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.d8p+4)]; + tensor clip_142_cast_fp16 = clip(alpha = feed_forward1s_5_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_5_ffw_layer_1_output_max_to_fp16, x = linear_56_cast_fp16)[name = string("clip_142_cast_fp16")]; + tensor hidden_states_531_cast_fp16 = silu(x = clip_142_cast_fp16)[name = string("hidden_states_531_cast_fp16")]; + fp16 feed_forward1s_5_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_5_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.4ep+3)]; + fp16 feed_forward1s_5_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_5_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.4cp+3)]; + tensor clip_143_cast_fp16 = clip(alpha = feed_forward1s_5_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_5_ffw_layer_2_input_max_to_fp16, x = hidden_states_531_cast_fp16)[name = string("clip_143_cast_fp16")]; + tensor feed_forward1s_5_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63300800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65398016))))[name = string("feed_forward1s_5_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_57_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_5_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_143_cast_fp16)[name = string("linear_57_cast_fp16")]; + fp16 feed_forward1s_5_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_5_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.d6p+5)]; + fp16 feed_forward1s_5_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_5_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.d2p+5)]; + tensor clip_144_cast_fp16 = clip(alpha = feed_forward1s_5_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_5_ffw_layer_2_output_max_to_fp16, x = linear_57_cast_fp16)[name = string("clip_144_cast_fp16")]; + string clip_144_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_144_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_144_cast_fp16_to_fp32 = cast(dtype = clip_144_cast_fp16_to_fp32_dtype_0, x = clip_144_cast_fp16)[name = string("cast_309")]; + tensor clip_145 = clip(alpha = var_2434, beta = var_2433, x = clip_144_cast_fp16_to_fp32)[name = string("clip_145")]; + fp32 var_2428_promoted_1 = const()[name = string("op_2428_promoted_1"), val = fp32(0x1p+1)]; + tensor var_2474 = pow(x = clip_145, y = var_2428_promoted_1)[name = string("op_2474")]; + tensor var_2476_axes_0 = const()[name = string("op_2476_axes_0"), val = tensor([-1])]; + bool var_2476_keep_dims_0 = const()[name = string("op_2476_keep_dims_0"), val = bool(true)]; + tensor var_2476 = reduce_mean(axes = var_2476_axes_0, keep_dims = var_2476_keep_dims_0, x = var_2474)[name = string("op_2476")]; + string var_2476_to_fp16_dtype_0 = const()[name = string("op_2476_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2476_to_fp16 = cast(dtype = var_2476_to_fp16_dtype_0, x = var_2476)[name = string("cast_308")]; + tensor mean_squared_93_cast_fp16 = add(x = var_2476_to_fp16, y = var_2477_to_fp16)[name = string("mean_squared_93_cast_fp16")]; + string mean_squared_93_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_93_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_93_cast_fp16_to_fp32 = cast(dtype = mean_squared_93_cast_fp16_to_fp32_dtype_0, x = mean_squared_93_cast_fp16)[name = string("cast_307")]; + tensor var_2479 = pow(x = mean_squared_93_cast_fp16_to_fp32, y = var_2432)[name = string("op_2479")]; + string clip_145_to_fp16_dtype_0 = const()[name = string("clip_145_to_fp16_dtype_0"), val = string("fp16")]; + string var_2479_to_fp16_dtype_0 = const()[name = string("op_2479_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_145_to_fp16 = cast(dtype = clip_145_to_fp16_dtype_0, x = clip_145)[name = string("cast_305")]; + tensor var_2479_to_fp16 = cast(dtype = var_2479_to_fp16_dtype_0, x = var_2479)[name = string("cast_306")]; + tensor normed_output_185_cast_fp16 = mul(x = clip_145_to_fp16, y = var_2479_to_fp16)[name = string("normed_output_185_cast_fp16")]; + tensor const_78_to_fp16 = const()[name = string("const_78_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65399104)))]; + tensor normed_output_187_cast_fp16 = mul(x = normed_output_185_cast_fp16, y = const_78_to_fp16)[name = string("normed_output_187_cast_fp16")]; + fp16 var_2424_to_fp16 = const()[name = string("op_2424_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_543_cast_fp16 = mul(x = normed_output_187_cast_fp16, y = var_2424_to_fp16)[name = string("hidden_states_543_cast_fp16")]; + tensor hidden_states_545_cast_fp16 = add(x = hidden_states_543_cast_fp16, y = normed_output_179_cast_fp16)[name = string("hidden_states_545_cast_fp16")]; + fp16 var_2486_to_fp16 = const()[name = string("op_2486_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_2487_to_fp16 = const()[name = string("op_2487_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_146_cast_fp16 = clip(alpha = var_2486_to_fp16, beta = var_2487_to_fp16, x = hidden_states_545_cast_fp16)[name = string("clip_146_cast_fp16")]; + string clip_146_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_146_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2489 = const()[name = string("op_2489"), val = fp32(-0x1p-1)]; + fp32 var_2493_promoted = const()[name = string("op_2493_promoted"), val = fp32(0x1p+1)]; + tensor clip_146_cast_fp16_to_fp32 = cast(dtype = clip_146_cast_fp16_to_fp32_dtype_0, x = clip_146_cast_fp16)[name = string("cast_304")]; + tensor var_2499 = pow(x = clip_146_cast_fp16_to_fp32, y = var_2493_promoted)[name = string("op_2499")]; + tensor var_2501_axes_0 = const()[name = string("op_2501_axes_0"), val = tensor([-1])]; + bool var_2501_keep_dims_0 = const()[name = string("op_2501_keep_dims_0"), val = bool(true)]; + tensor var_2501 = reduce_mean(axes = var_2501_axes_0, keep_dims = var_2501_keep_dims_0, x = var_2499)[name = string("op_2501")]; + string var_2501_to_fp16_dtype_0 = const()[name = string("op_2501_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2502_to_fp16 = const()[name = string("op_2502_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2501_to_fp16 = cast(dtype = var_2501_to_fp16_dtype_0, x = var_2501)[name = string("cast_303")]; + tensor mean_squared_95_cast_fp16 = add(x = var_2501_to_fp16, y = var_2502_to_fp16)[name = string("mean_squared_95_cast_fp16")]; + string mean_squared_95_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_95_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_95_cast_fp16_to_fp32 = cast(dtype = mean_squared_95_cast_fp16_to_fp32_dtype_0, x = mean_squared_95_cast_fp16)[name = string("cast_302")]; + tensor var_2504 = pow(x = mean_squared_95_cast_fp16_to_fp32, y = var_2489)[name = string("op_2504")]; + string var_2504_to_fp16_dtype_0 = const()[name = string("op_2504_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2504_to_fp16 = cast(dtype = var_2504_to_fp16_dtype_0, x = var_2504)[name = string("cast_301")]; + tensor normed_output_189_cast_fp16 = mul(x = clip_146_cast_fp16, y = var_2504_to_fp16)[name = string("normed_output_189_cast_fp16")]; + tensor const_79_to_fp16 = const()[name = string("const_79_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65401216)))]; + tensor normed_output_191_cast_fp16 = mul(x = normed_output_189_cast_fp16, y = const_79_to_fp16)[name = string("normed_output_191_cast_fp16")]; + int32 var_2510 = const()[name = string("op_2510"), val = int32(-1)]; + fp32 var_2511 = const()[name = string("op_2511"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_5_q_proj_input_min_to_fp16 = const()[name = string("self_attns_5_q_proj_input_min_to_fp16"), val = fp16(-0x1.32p+3)]; + fp16 self_attns_5_q_proj_input_max_to_fp16 = const()[name = string("self_attns_5_q_proj_input_max_to_fp16"), val = fp16(0x1.3p+3)]; + tensor clip_147_cast_fp16 = clip(alpha = self_attns_5_q_proj_input_min_to_fp16, beta = self_attns_5_q_proj_input_max_to_fp16, x = normed_output_191_cast_fp16)[name = string("clip_147_cast_fp16")]; + tensor self_attns_5_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65403328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65927680))))[name = string("self_attns_5_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_58_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_5_q_proj_linear_weight_to_fp16_palettized, x = clip_147_cast_fp16)[name = string("linear_58_cast_fp16")]; + fp16 self_attns_5_q_proj_output_min_to_fp16 = const()[name = string("self_attns_5_q_proj_output_min_to_fp16"), val = fp16(-0x1.1p+4)]; + fp16 self_attns_5_q_proj_output_max_to_fp16 = const()[name = string("self_attns_5_q_proj_output_max_to_fp16"), val = fp16(0x1.0ep+4)]; + tensor clip_148_cast_fp16 = clip(alpha = self_attns_5_q_proj_output_min_to_fp16, beta = self_attns_5_q_proj_output_max_to_fp16, x = linear_58_cast_fp16)[name = string("clip_148_cast_fp16")]; + tensor var_2555 = const()[name = string("op_2555"), val = tensor([1, 50, 8, 128])]; + tensor q_11_cast_fp16 = reshape(shape = var_2555, x = clip_148_cast_fp16)[name = string("q_11_cast_fp16")]; + tensor self_attns_5_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65928768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66453120))))[name = string("self_attns_5_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_59_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_5_k_proj_linear_weight_to_fp16_palettized, x = clip_147_cast_fp16)[name = string("linear_59_cast_fp16")]; + fp16 self_attns_5_k_proj_output_min_to_fp16 = const()[name = string("self_attns_5_k_proj_output_min_to_fp16"), val = fp16(-0x1.1p+4)]; + fp16 self_attns_5_k_proj_output_max_to_fp16 = const()[name = string("self_attns_5_k_proj_output_max_to_fp16"), val = fp16(0x1.0ep+4)]; + tensor clip_150_cast_fp16 = clip(alpha = self_attns_5_k_proj_output_min_to_fp16, beta = self_attns_5_k_proj_output_max_to_fp16, x = linear_59_cast_fp16)[name = string("clip_150_cast_fp16")]; + tensor var_2567 = const()[name = string("op_2567"), val = tensor([1, 50, 8, 128])]; + tensor k_11_cast_fp16 = reshape(shape = var_2567, x = clip_150_cast_fp16)[name = string("k_11_cast_fp16")]; + tensor self_attns_5_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66454208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66978560))))[name = string("self_attns_5_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_60_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_5_v_proj_linear_weight_to_fp16_palettized, x = clip_147_cast_fp16)[name = string("linear_60_cast_fp16")]; + fp16 self_attns_5_v_proj_output_min_to_fp16 = const()[name = string("self_attns_5_v_proj_output_min_to_fp16"), val = fp16(-0x1.1p+4)]; + fp16 self_attns_5_v_proj_output_max_to_fp16 = const()[name = string("self_attns_5_v_proj_output_max_to_fp16"), val = fp16(0x1.0ep+4)]; + tensor clip_152_cast_fp16 = clip(alpha = self_attns_5_v_proj_output_min_to_fp16, beta = self_attns_5_v_proj_output_max_to_fp16, x = linear_60_cast_fp16)[name = string("clip_152_cast_fp16")]; + tensor var_2579 = const()[name = string("op_2579"), val = tensor([1, 50, 8, 128])]; + tensor input_243_cast_fp16 = reshape(shape = var_2579, x = clip_152_cast_fp16)[name = string("input_243_cast_fp16")]; + fp16 var_2581_to_fp16 = const()[name = string("op_2581_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_2582_cast_fp16 = mul(x = q_11_cast_fp16, y = var_2581_to_fp16)[name = string("op_2582_cast_fp16")]; + tensor var_2583_to_fp16 = const()[name = string("op_2583_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66979648)))]; + tensor input_239_cast_fp16 = mul(x = var_2582_cast_fp16, y = var_2583_to_fp16)[name = string("input_239_cast_fp16")]; + fp16 var_2585_to_fp16 = const()[name = string("op_2585_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_241_cast_fp16 = mul(x = k_11_cast_fp16, y = var_2585_to_fp16)[name = string("input_241_cast_fp16")]; + tensor q_padded_11_pad_0 = const()[name = string("q_padded_11_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_11_mode_0 = const()[name = string("q_padded_11_mode_0"), val = string("constant")]; + fp16 const_80_to_fp16 = const()[name = string("const_80_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_11_cast_fp16 = pad(constant_val = const_80_to_fp16, mode = q_padded_11_mode_0, pad = q_padded_11_pad_0, x = input_239_cast_fp16)[name = string("q_padded_11_cast_fp16")]; + tensor var_2589 = const()[name = string("op_2589"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_11_cast_fp16 = reshape(shape = var_2589, x = q_padded_11_cast_fp16)[name = string("q_blocks_11_cast_fp16")]; + tensor k_padded_11_pad_0 = const()[name = string("k_padded_11_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_11_mode_0 = const()[name = string("k_padded_11_mode_0"), val = string("constant")]; + fp16 const_81_to_fp16 = const()[name = string("const_81_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_11_cast_fp16 = pad(constant_val = const_81_to_fp16, mode = k_padded_11_mode_0, pad = k_padded_11_pad_0, x = input_241_cast_fp16)[name = string("k_padded_11_cast_fp16")]; + tensor v_padded_11_pad_0 = const()[name = string("v_padded_11_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_11_mode_0 = const()[name = string("v_padded_11_mode_0"), val = string("constant")]; + fp16 const_82_to_fp16 = const()[name = string("const_82_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_11_cast_fp16 = pad(constant_val = const_82_to_fp16, mode = v_padded_11_mode_0, pad = v_padded_11_pad_0, x = input_243_cast_fp16)[name = string("v_padded_11_cast_fp16")]; + tensor var_2596_begin_0 = const()[name = string("op_2596_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2596_end_0 = const()[name = string("op_2596_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_2596_end_mask_0 = const()[name = string("op_2596_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2596_cast_fp16 = slice_by_index(begin = var_2596_begin_0, end = var_2596_end_0, end_mask = var_2596_end_mask_0, x = k_padded_11_cast_fp16)[name = string("op_2596_cast_fp16")]; + tensor var_2598_begin_0 = const()[name = string("op_2598_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_2598_end_0 = const()[name = string("op_2598_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_2598_end_mask_0 = const()[name = string("op_2598_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2598_cast_fp16 = slice_by_index(begin = var_2598_begin_0, end = var_2598_end_0, end_mask = var_2598_end_mask_0, x = k_padded_11_cast_fp16)[name = string("op_2598_cast_fp16")]; + tensor var_2600_begin_0 = const()[name = string("op_2600_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_2600_end_0 = const()[name = string("op_2600_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_2600_end_mask_0 = const()[name = string("op_2600_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2600_cast_fp16 = slice_by_index(begin = var_2600_begin_0, end = var_2600_end_0, end_mask = var_2600_end_mask_0, x = k_padded_11_cast_fp16)[name = string("op_2600_cast_fp16")]; + tensor var_2602_begin_0 = const()[name = string("op_2602_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_2602_end_0 = const()[name = string("op_2602_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_2602_end_mask_0 = const()[name = string("op_2602_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2602_cast_fp16 = slice_by_index(begin = var_2602_begin_0, end = var_2602_end_0, end_mask = var_2602_end_mask_0, x = k_padded_11_cast_fp16)[name = string("op_2602_cast_fp16")]; + tensor var_2604_begin_0 = const()[name = string("op_2604_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_2604_end_0 = const()[name = string("op_2604_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_2604_end_mask_0 = const()[name = string("op_2604_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2604_cast_fp16 = slice_by_index(begin = var_2604_begin_0, end = var_2604_end_0, end_mask = var_2604_end_mask_0, x = k_padded_11_cast_fp16)[name = string("op_2604_cast_fp16")]; + int32 k_blocks_11_axis_0 = const()[name = string("k_blocks_11_axis_0"), val = int32(1)]; + tensor k_blocks_11_cast_fp16 = stack(axis = k_blocks_11_axis_0, values = (var_2596_cast_fp16, var_2598_cast_fp16, var_2600_cast_fp16, var_2602_cast_fp16, var_2604_cast_fp16))[name = string("k_blocks_11_cast_fp16")]; + tensor var_2608_begin_0 = const()[name = string("op_2608_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2608_end_0 = const()[name = string("op_2608_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_2608_end_mask_0 = const()[name = string("op_2608_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2608_cast_fp16 = slice_by_index(begin = var_2608_begin_0, end = var_2608_end_0, end_mask = var_2608_end_mask_0, x = v_padded_11_cast_fp16)[name = string("op_2608_cast_fp16")]; + tensor var_2610_begin_0 = const()[name = string("op_2610_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_2610_end_0 = const()[name = string("op_2610_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_2610_end_mask_0 = const()[name = string("op_2610_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2610_cast_fp16 = slice_by_index(begin = var_2610_begin_0, end = var_2610_end_0, end_mask = var_2610_end_mask_0, x = v_padded_11_cast_fp16)[name = string("op_2610_cast_fp16")]; + tensor var_2612_begin_0 = const()[name = string("op_2612_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_2612_end_0 = const()[name = string("op_2612_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_2612_end_mask_0 = const()[name = string("op_2612_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2612_cast_fp16 = slice_by_index(begin = var_2612_begin_0, end = var_2612_end_0, end_mask = var_2612_end_mask_0, x = v_padded_11_cast_fp16)[name = string("op_2612_cast_fp16")]; + tensor var_2614_begin_0 = const()[name = string("op_2614_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_2614_end_0 = const()[name = string("op_2614_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_2614_end_mask_0 = const()[name = string("op_2614_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2614_cast_fp16 = slice_by_index(begin = var_2614_begin_0, end = var_2614_end_0, end_mask = var_2614_end_mask_0, x = v_padded_11_cast_fp16)[name = string("op_2614_cast_fp16")]; + tensor var_2616_begin_0 = const()[name = string("op_2616_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_2616_end_0 = const()[name = string("op_2616_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_2616_end_mask_0 = const()[name = string("op_2616_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2616_cast_fp16 = slice_by_index(begin = var_2616_begin_0, end = var_2616_end_0, end_mask = var_2616_end_mask_0, x = v_padded_11_cast_fp16)[name = string("op_2616_cast_fp16")]; + int32 v_blocks_11_axis_0 = const()[name = string("v_blocks_11_axis_0"), val = int32(1)]; + tensor v_blocks_11_cast_fp16 = stack(axis = v_blocks_11_axis_0, values = (var_2608_cast_fp16, var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16))[name = string("v_blocks_11_cast_fp16")]; + tensor var_2624 = const()[name = string("op_2624"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_2626 = const()[name = string("op_2626"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_11_transpose_x_0 = const()[name = string("matrix_ac_11_transpose_x_0"), val = bool(false)]; + bool matrix_ac_11_transpose_y_0 = const()[name = string("matrix_ac_11_transpose_y_0"), val = bool(false)]; + tensor queries_11_cast_fp16 = transpose(perm = var_2624, x = q_blocks_11_cast_fp16)[name = string("transpose_40")]; + tensor keys_t_11_cast_fp16 = transpose(perm = var_2626, x = k_blocks_11_cast_fp16)[name = string("transpose_41")]; + tensor matrix_ac_11_cast_fp16 = matmul(transpose_x = matrix_ac_11_transpose_x_0, transpose_y = matrix_ac_11_transpose_y_0, x = queries_11_cast_fp16, y = keys_t_11_cast_fp16)[name = string("matrix_ac_11_cast_fp16")]; + tensor var_2629 = const()[name = string("op_2629"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_11_cast_fp16 = reshape(shape = var_2629, x = queries_11_cast_fp16)[name = string("q_flat_11_cast_fp16")]; + bool matrix_bd_51_transpose_x_0 = const()[name = string("matrix_bd_51_transpose_x_0"), val = bool(false)]; + bool matrix_bd_51_transpose_y_0 = const()[name = string("matrix_bd_51_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_11_to_fp16 = const()[name = string("rel_k_t_11_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66979968)))]; + tensor matrix_bd_51_cast_fp16 = matmul(transpose_x = matrix_bd_51_transpose_x_0, transpose_y = matrix_bd_51_transpose_y_0, x = q_flat_11_cast_fp16, y = rel_k_t_11_to_fp16)[name = string("matrix_bd_51_cast_fp16")]; + tensor var_2634 = const()[name = string("op_2634"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_245_cast_fp16 = reshape(shape = var_2634, x = matrix_bd_51_cast_fp16)[name = string("input_245_cast_fp16")]; + tensor matrix_bd_53_pad_0 = const()[name = string("matrix_bd_53_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67006656)))]; + string matrix_bd_53_mode_0 = const()[name = string("matrix_bd_53_mode_0"), val = string("constant")]; + fp16 const_84_to_fp16 = const()[name = string("const_84_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_53_cast_fp16 = pad(constant_val = const_84_to_fp16, mode = matrix_bd_53_mode_0, pad = matrix_bd_53_pad_0, x = input_245_cast_fp16)[name = string("matrix_bd_53_cast_fp16")]; + tensor var_2638 = const()[name = string("op_2638"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_55_cast_fp16 = reshape(shape = var_2638, x = matrix_bd_53_cast_fp16)[name = string("matrix_bd_55_cast_fp16")]; + tensor matrix_bd_57_begin_0 = const()[name = string("matrix_bd_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_57_end_0 = const()[name = string("matrix_bd_57_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_57_end_mask_0 = const()[name = string("matrix_bd_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_57_cast_fp16 = slice_by_index(begin = matrix_bd_57_begin_0, end = matrix_bd_57_end_0, end_mask = matrix_bd_57_end_mask_0, x = matrix_bd_55_cast_fp16)[name = string("matrix_bd_57_cast_fp16")]; + tensor var_2644 = const()[name = string("op_2644"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_59_cast_fp16 = reshape(shape = var_2644, x = matrix_bd_57_cast_fp16)[name = string("matrix_bd_59_cast_fp16")]; + tensor attn_31_cast_fp16 = add(x = matrix_ac_11_cast_fp16, y = matrix_bd_59_cast_fp16)[name = string("attn_31_cast_fp16")]; + fp16 _inversed_2647_y_0_to_fp16 = const()[name = string("_inversed_2647_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_2647_cast_fp16 = mul(x = attn_31_cast_fp16, y = _inversed_2647_y_0_to_fp16)[name = string("_inversed_2647_cast_fp16")]; + string _inversed_2647_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_2647_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_2647_cast_fp16_to_fp32 = cast(dtype = _inversed_2647_cast_fp16_to_fp32_dtype_0, x = _inversed_2647_cast_fp16)[name = string("cast_300")]; + tensor var_2648 = tanh(x = _inversed_2647_cast_fp16_to_fp32)[name = string("op_2648")]; + string var_2648_to_fp16_dtype_0 = const()[name = string("op_2648_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_5_softcap_to_fp16 = const()[name = string("self_attns_5_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_2648_to_fp16 = cast(dtype = var_2648_to_fp16_dtype_0, x = var_2648)[name = string("cast_299")]; + tensor attn_33_cast_fp16 = mul(x = var_2648_to_fp16, y = self_attns_5_softcap_to_fp16)[name = string("attn_33_cast_fp16")]; + string attn_33_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_33_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_33_cast_fp16_to_fp32 = cast(dtype = attn_33_cast_fp16_to_fp32_dtype_0, x = attn_33_cast_fp16)[name = string("cast_298")]; + tensor input_247 = select(a = var_2511, b = attn_33_cast_fp16_to_fp32, cond = var_460)[name = string("input_247")]; + tensor var_2652 = softmax(axis = var_2510, x = input_247)[name = string("op_2652")]; + tensor var_2654 = const()[name = string("op_2654"), val = tensor([0, 3, 1, -3, -1])]; + bool out_31_transpose_x_0 = const()[name = string("out_31_transpose_x_0"), val = bool(false)]; + bool out_31_transpose_y_0 = const()[name = string("out_31_transpose_y_0"), val = bool(false)]; + string var_2652_to_fp16_dtype_0 = const()[name = string("op_2652_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_11_cast_fp16 = transpose(perm = var_2654, x = v_blocks_11_cast_fp16)[name = string("transpose_39")]; + tensor var_2652_to_fp16 = cast(dtype = var_2652_to_fp16_dtype_0, x = var_2652)[name = string("cast_297")]; + tensor out_31_cast_fp16 = matmul(transpose_x = out_31_transpose_x_0, transpose_y = out_31_transpose_y_0, x = var_2652_to_fp16, y = values_t_11_cast_fp16)[name = string("out_31_cast_fp16")]; + tensor var_2657 = const()[name = string("op_2657"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_2659 = const()[name = string("op_2659"), val = tensor([1, 60, 1024])]; + tensor var_2658_cast_fp16 = transpose(perm = var_2657, x = out_31_cast_fp16)[name = string("transpose_38")]; + tensor out_33_cast_fp16 = reshape(shape = var_2659, x = var_2658_cast_fp16)[name = string("out_33_cast_fp16")]; + tensor var_2662_begin_0 = const()[name = string("op_2662_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2662_end_0 = const()[name = string("op_2662_end_0"), val = tensor([1, 50, 1024])]; + tensor var_2662_end_mask_0 = const()[name = string("op_2662_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2662_cast_fp16 = slice_by_index(begin = var_2662_begin_0, end = var_2662_end_0, end_mask = var_2662_end_mask_0, x = out_33_cast_fp16)[name = string("op_2662_cast_fp16")]; + fp16 self_attns_5_post_input_min_to_fp16 = const()[name = string("self_attns_5_post_input_min_to_fp16"), val = fp16(-0x1.f6p+3)]; + fp16 self_attns_5_post_input_max_to_fp16 = const()[name = string("self_attns_5_post_input_max_to_fp16"), val = fp16(0x1.f2p+3)]; + tensor clip_153_cast_fp16 = clip(alpha = self_attns_5_post_input_min_to_fp16, beta = self_attns_5_post_input_max_to_fp16, x = var_2662_cast_fp16)[name = string("clip_153_cast_fp16")]; + tensor self_attns_5_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67006784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67531136))))[name = string("self_attns_5_post_linear_weight_to_fp16_palettized")]; + tensor linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_5_post_linear_weight_to_fp16_palettized, x = clip_153_cast_fp16)[name = string("linear_62_cast_fp16")]; + fp16 self_attns_5_post_output_min_to_fp16 = const()[name = string("self_attns_5_post_output_min_to_fp16"), val = fp16(-0x1.96p+5)]; + fp16 self_attns_5_post_output_max_to_fp16 = const()[name = string("self_attns_5_post_output_max_to_fp16"), val = fp16(0x1.94p+5)]; + tensor clip_154_cast_fp16 = clip(alpha = self_attns_5_post_output_min_to_fp16, beta = self_attns_5_post_output_max_to_fp16, x = linear_62_cast_fp16)[name = string("clip_154_cast_fp16")]; + fp16 var_2674_to_fp16 = const()[name = string("op_2674_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_2675_to_fp16 = const()[name = string("op_2675_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_155_cast_fp16 = clip(alpha = var_2674_to_fp16, beta = var_2675_to_fp16, x = clip_154_cast_fp16)[name = string("clip_155_cast_fp16")]; + string clip_155_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_155_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2677 = const()[name = string("op_2677"), val = fp32(-0x1p-1)]; + fp32 var_2681_promoted = const()[name = string("op_2681_promoted"), val = fp32(0x1p+1)]; + tensor clip_155_cast_fp16_to_fp32 = cast(dtype = clip_155_cast_fp16_to_fp32_dtype_0, x = clip_155_cast_fp16)[name = string("cast_296")]; + tensor var_2687 = pow(x = clip_155_cast_fp16_to_fp32, y = var_2681_promoted)[name = string("op_2687")]; + tensor var_2689_axes_0 = const()[name = string("op_2689_axes_0"), val = tensor([-1])]; + bool var_2689_keep_dims_0 = const()[name = string("op_2689_keep_dims_0"), val = bool(true)]; + tensor var_2689 = reduce_mean(axes = var_2689_axes_0, keep_dims = var_2689_keep_dims_0, x = var_2687)[name = string("op_2689")]; + string var_2689_to_fp16_dtype_0 = const()[name = string("op_2689_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2690_to_fp16 = const()[name = string("op_2690_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2689_to_fp16 = cast(dtype = var_2689_to_fp16_dtype_0, x = var_2689)[name = string("cast_295")]; + tensor mean_squared_97_cast_fp16 = add(x = var_2689_to_fp16, y = var_2690_to_fp16)[name = string("mean_squared_97_cast_fp16")]; + string mean_squared_97_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_97_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_97_cast_fp16_to_fp32 = cast(dtype = mean_squared_97_cast_fp16_to_fp32_dtype_0, x = mean_squared_97_cast_fp16)[name = string("cast_294")]; + tensor var_2692 = pow(x = mean_squared_97_cast_fp16_to_fp32, y = var_2677)[name = string("op_2692")]; + string var_2692_to_fp16_dtype_0 = const()[name = string("op_2692_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2692_to_fp16 = cast(dtype = var_2692_to_fp16_dtype_0, x = var_2692)[name = string("cast_293")]; + tensor normed_output_193_cast_fp16 = mul(x = clip_155_cast_fp16, y = var_2692_to_fp16)[name = string("normed_output_193_cast_fp16")]; + tensor const_85_to_fp16 = const()[name = string("const_85_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67532224)))]; + tensor normed_output_195_cast_fp16 = mul(x = normed_output_193_cast_fp16, y = const_85_to_fp16)[name = string("normed_output_195_cast_fp16")]; + tensor hidden_states_571_cast_fp16 = add(x = normed_output_195_cast_fp16, y = hidden_states_545_cast_fp16)[name = string("hidden_states_571_cast_fp16")]; + string hidden_states_571_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_571_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2699 = const()[name = string("op_2699"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_2700 = const()[name = string("op_2700"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_2712 = const()[name = string("op_2712"), val = fp32(-0x1p-1)]; + fp32 var_2708_promoted = const()[name = string("op_2708_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_571_cast_fp16_to_fp32 = cast(dtype = hidden_states_571_cast_fp16_to_fp32_dtype_0, x = hidden_states_571_cast_fp16)[name = string("cast_292")]; + tensor var_2720 = pow(x = hidden_states_571_cast_fp16_to_fp32, y = var_2708_promoted)[name = string("op_2720")]; + tensor var_2722_axes_0 = const()[name = string("op_2722_axes_0"), val = tensor([-1])]; + bool var_2722_keep_dims_0 = const()[name = string("op_2722_keep_dims_0"), val = bool(true)]; + tensor var_2722 = reduce_mean(axes = var_2722_axes_0, keep_dims = var_2722_keep_dims_0, x = var_2720)[name = string("op_2722")]; + string var_2722_to_fp16_dtype_0 = const()[name = string("op_2722_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2723_to_fp16 = const()[name = string("op_2723_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2722_to_fp16 = cast(dtype = var_2722_to_fp16_dtype_0, x = var_2722)[name = string("cast_291")]; + tensor mean_squared_99_cast_fp16 = add(x = var_2722_to_fp16, y = var_2723_to_fp16)[name = string("mean_squared_99_cast_fp16")]; + string mean_squared_99_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_99_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_99_cast_fp16_to_fp32 = cast(dtype = mean_squared_99_cast_fp16_to_fp32_dtype_0, x = mean_squared_99_cast_fp16)[name = string("cast_290")]; + tensor var_2725 = pow(x = mean_squared_99_cast_fp16_to_fp32, y = var_2712)[name = string("op_2725")]; + string var_2725_to_fp16_dtype_0 = const()[name = string("op_2725_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2725_to_fp16 = cast(dtype = var_2725_to_fp16_dtype_0, x = var_2725)[name = string("cast_289")]; + tensor normed_output_197_cast_fp16 = mul(x = hidden_states_571_cast_fp16, y = var_2725_to_fp16)[name = string("normed_output_197_cast_fp16")]; + tensor const_86_to_fp16 = const()[name = string("const_86_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67534336)))]; + tensor normed_output_199_cast_fp16 = mul(x = normed_output_197_cast_fp16, y = const_86_to_fp16)[name = string("normed_output_199_cast_fp16")]; + fp16 lconv1ds_5_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_5_linear_start_input_min_to_fp16"), val = fp16(-0x1.7p+3)]; + fp16 lconv1ds_5_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_5_linear_start_input_max_to_fp16"), val = fp16(0x1.6ep+3)]; + tensor clip_156_cast_fp16 = clip(alpha = lconv1ds_5_linear_start_input_min_to_fp16, beta = lconv1ds_5_linear_start_input_max_to_fp16, x = normed_output_199_cast_fp16)[name = string("clip_156_cast_fp16")]; + tensor lconv1ds_5_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67536448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68585088))))[name = string("lconv1ds_5_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_63_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_5_linear_start_linear_weight_to_fp16_palettized, x = clip_156_cast_fp16)[name = string("linear_63_cast_fp16")]; + fp16 lconv1ds_5_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_5_linear_start_output_min_to_fp16"), val = fp16(-0x1.92p+4)]; + fp16 lconv1ds_5_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_5_linear_start_output_max_to_fp16"), val = fp16(0x1.8ep+4)]; + tensor clip_157_cast_fp16 = clip(alpha = lconv1ds_5_linear_start_output_min_to_fp16, beta = lconv1ds_5_linear_start_output_max_to_fp16, x = linear_63_cast_fp16)[name = string("clip_157_cast_fp16")]; + int32 hidden_states_579_split_num_splits_0 = const()[name = string("hidden_states_579_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_579_split_axis_0 = const()[name = string("hidden_states_579_split_axis_0"), val = int32(-1)]; + tensor hidden_states_579_split_cast_fp16_0, tensor hidden_states_579_split_cast_fp16_1 = split(axis = hidden_states_579_split_axis_0, num_splits = hidden_states_579_split_num_splits_0, x = clip_157_cast_fp16)[name = string("hidden_states_579_split_cast_fp16")]; + tensor hidden_states_579_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_579_split_cast_fp16_1)[name = string("hidden_states_579_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_579_cast_fp16 = mul(x = hidden_states_579_split_cast_fp16_0, y = hidden_states_579_split_1_sigmoid_cast_fp16)[name = string("hidden_states_579_cast_fp16")]; + tensor input_255_perm_0 = const()[name = string("input_255_perm_0"), val = tensor([0, 2, 1])]; + tensor input_257_pad_0 = const()[name = string("input_257_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_257_mode_0 = const()[name = string("input_257_mode_0"), val = string("constant")]; + fp16 const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = fp16(0x0p+0)]; + tensor input_255_cast_fp16 = transpose(perm = input_255_perm_0, x = hidden_states_579_cast_fp16)[name = string("transpose_37")]; + tensor input_257_cast_fp16 = pad(constant_val = const_87_to_fp16, mode = input_257_mode_0, pad = input_257_pad_0, x = input_255_cast_fp16)[name = string("input_257_cast_fp16")]; + string var_2751_pad_type_0 = const()[name = string("op_2751_pad_type_0"), val = string("valid")]; + int32 var_2751_groups_0 = const()[name = string("op_2751_groups_0"), val = int32(1024)]; + tensor var_2751_strides_0 = const()[name = string("op_2751_strides_0"), val = tensor([1])]; + tensor var_2751_pad_0 = const()[name = string("op_2751_pad_0"), val = tensor([0, 0])]; + tensor var_2751_dilations_0 = const()[name = string("op_2751_dilations_0"), val = tensor([1])]; + tensor lconv1ds_5_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68587200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68589824))))[name = string("lconv1ds_5_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_2751_cast_fp16 = conv(dilations = var_2751_dilations_0, groups = var_2751_groups_0, pad = var_2751_pad_0, pad_type = var_2751_pad_type_0, strides = var_2751_strides_0, weight = lconv1ds_5_depthwise_conv1d_weight_to_fp16_palettized, x = input_257_cast_fp16)[name = string("op_2751_cast_fp16")]; + tensor hidden_states_581_perm_0 = const()[name = string("hidden_states_581_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_581_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_581_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_581_cast_fp16 = transpose(perm = hidden_states_581_perm_0, x = var_2751_cast_fp16)[name = string("transpose_36")]; + tensor hidden_states_581_cast_fp16_to_fp32 = cast(dtype = hidden_states_581_cast_fp16_to_fp32_dtype_0, x = hidden_states_581_cast_fp16)[name = string("cast_288")]; + tensor clip_158 = clip(alpha = var_2700, beta = var_2699, x = hidden_states_581_cast_fp16_to_fp32)[name = string("clip_158")]; + fp32 var_2708_promoted_1 = const()[name = string("op_2708_promoted_1"), val = fp32(0x1p+1)]; + tensor var_2756 = pow(x = clip_158, y = var_2708_promoted_1)[name = string("op_2756")]; + tensor var_2758_axes_0 = const()[name = string("op_2758_axes_0"), val = tensor([-1])]; + bool var_2758_keep_dims_0 = const()[name = string("op_2758_keep_dims_0"), val = bool(true)]; + tensor var_2758 = reduce_mean(axes = var_2758_axes_0, keep_dims = var_2758_keep_dims_0, x = var_2756)[name = string("op_2758")]; + string var_2758_to_fp16_dtype_0 = const()[name = string("op_2758_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2759_to_fp16 = const()[name = string("op_2759_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2758_to_fp16 = cast(dtype = var_2758_to_fp16_dtype_0, x = var_2758)[name = string("cast_287")]; + tensor mean_squared_101_cast_fp16 = add(x = var_2758_to_fp16, y = var_2759_to_fp16)[name = string("mean_squared_101_cast_fp16")]; + string mean_squared_101_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_101_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_101_cast_fp16_to_fp32 = cast(dtype = mean_squared_101_cast_fp16_to_fp32_dtype_0, x = mean_squared_101_cast_fp16)[name = string("cast_286")]; + tensor var_2761 = pow(x = mean_squared_101_cast_fp16_to_fp32, y = var_2712)[name = string("op_2761")]; + string clip_158_to_fp16_dtype_0 = const()[name = string("clip_158_to_fp16_dtype_0"), val = string("fp16")]; + string var_2761_to_fp16_dtype_0 = const()[name = string("op_2761_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_158_to_fp16 = cast(dtype = clip_158_to_fp16_dtype_0, x = clip_158)[name = string("cast_284")]; + tensor var_2761_to_fp16 = cast(dtype = var_2761_to_fp16_dtype_0, x = var_2761)[name = string("cast_285")]; + tensor normed_output_201_cast_fp16 = mul(x = clip_158_to_fp16, y = var_2761_to_fp16)[name = string("normed_output_201_cast_fp16")]; + tensor const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68590912)))]; + tensor normed_output_203_cast_fp16 = mul(x = normed_output_201_cast_fp16, y = const_88_to_fp16)[name = string("normed_output_203_cast_fp16")]; + tensor hidden_states_587_cast_fp16 = silu(x = normed_output_203_cast_fp16)[name = string("hidden_states_587_cast_fp16")]; + fp16 lconv1ds_5_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_5_linear_end_input_min_to_fp16"), val = fp16(-0x1.bep+3)]; + fp16 lconv1ds_5_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_5_linear_end_input_max_to_fp16"), val = fp16(0x1.bap+3)]; + tensor clip_159_cast_fp16 = clip(alpha = lconv1ds_5_linear_end_input_min_to_fp16, beta = lconv1ds_5_linear_end_input_max_to_fp16, x = hidden_states_587_cast_fp16)[name = string("clip_159_cast_fp16")]; + tensor lconv1ds_5_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68593024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69117376))))[name = string("lconv1ds_5_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_64_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_5_linear_end_linear_weight_to_fp16_palettized, x = clip_159_cast_fp16)[name = string("linear_64_cast_fp16")]; + fp16 lconv1ds_5_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_5_linear_end_output_min_to_fp16"), val = fp16(-0x1.02p+3)]; + fp16 lconv1ds_5_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_5_linear_end_output_max_to_fp16"), val = fp16(0x1p+3)]; + tensor clip_160_cast_fp16 = clip(alpha = lconv1ds_5_linear_end_output_min_to_fp16, beta = lconv1ds_5_linear_end_output_max_to_fp16, x = linear_64_cast_fp16)[name = string("clip_160_cast_fp16")]; + tensor hidden_states_593_cast_fp16 = add(x = clip_160_cast_fp16, y = hidden_states_571_cast_fp16)[name = string("hidden_states_593_cast_fp16")]; + string hidden_states_593_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_593_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2785 = const()[name = string("op_2785"), val = fp32(-0x1p-1)]; + fp32 var_2786 = const()[name = string("op_2786"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_2787 = const()[name = string("op_2787"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_593_cast_fp16_to_fp32 = cast(dtype = hidden_states_593_cast_fp16_to_fp32_dtype_0, x = hidden_states_593_cast_fp16)[name = string("cast_283")]; + tensor clip_161 = clip(alpha = var_2787, beta = var_2786, x = hidden_states_593_cast_fp16_to_fp32)[name = string("clip_161")]; + fp32 var_2781_promoted = const()[name = string("op_2781_promoted"), val = fp32(0x1p+1)]; + tensor var_2795 = pow(x = clip_161, y = var_2781_promoted)[name = string("op_2795")]; + tensor var_2797_axes_0 = const()[name = string("op_2797_axes_0"), val = tensor([-1])]; + bool var_2797_keep_dims_0 = const()[name = string("op_2797_keep_dims_0"), val = bool(true)]; + tensor var_2797 = reduce_mean(axes = var_2797_axes_0, keep_dims = var_2797_keep_dims_0, x = var_2795)[name = string("op_2797")]; + string var_2797_to_fp16_dtype_0 = const()[name = string("op_2797_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2798_to_fp16 = const()[name = string("op_2798_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2797_to_fp16 = cast(dtype = var_2797_to_fp16_dtype_0, x = var_2797)[name = string("cast_282")]; + tensor mean_squared_103_cast_fp16 = add(x = var_2797_to_fp16, y = var_2798_to_fp16)[name = string("mean_squared_103_cast_fp16")]; + string mean_squared_103_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_103_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_103_cast_fp16_to_fp32 = cast(dtype = mean_squared_103_cast_fp16_to_fp32_dtype_0, x = mean_squared_103_cast_fp16)[name = string("cast_281")]; + tensor var_2800 = pow(x = mean_squared_103_cast_fp16_to_fp32, y = var_2785)[name = string("op_2800")]; + string clip_161_to_fp16_dtype_0 = const()[name = string("clip_161_to_fp16_dtype_0"), val = string("fp16")]; + string var_2800_to_fp16_dtype_0 = const()[name = string("op_2800_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_161_to_fp16 = cast(dtype = clip_161_to_fp16_dtype_0, x = clip_161)[name = string("cast_279")]; + tensor var_2800_to_fp16 = cast(dtype = var_2800_to_fp16_dtype_0, x = var_2800)[name = string("cast_280")]; + tensor normed_output_205_cast_fp16 = mul(x = clip_161_to_fp16, y = var_2800_to_fp16)[name = string("normed_output_205_cast_fp16")]; + tensor const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69118464)))]; + tensor normed_output_207_cast_fp16 = mul(x = normed_output_205_cast_fp16, y = const_89_to_fp16)[name = string("normed_output_207_cast_fp16")]; + fp16 feed_forward2s_5_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_5_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.9ep+3)]; + fp16 feed_forward2s_5_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_5_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.9cp+3)]; + tensor clip_162_cast_fp16 = clip(alpha = feed_forward2s_5_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_5_ffw_layer_1_input_max_to_fp16, x = normed_output_207_cast_fp16)[name = string("clip_162_cast_fp16")]; + tensor feed_forward2s_5_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69120576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71217792))))[name = string("feed_forward2s_5_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_5_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_162_cast_fp16)[name = string("linear_65_cast_fp16")]; + fp16 feed_forward2s_5_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_5_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.f4p+4)]; + fp16 feed_forward2s_5_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_5_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.fp+4)]; + tensor clip_163_cast_fp16 = clip(alpha = feed_forward2s_5_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_5_ffw_layer_1_output_max_to_fp16, x = linear_65_cast_fp16)[name = string("clip_163_cast_fp16")]; + tensor hidden_states_603_cast_fp16 = silu(x = clip_163_cast_fp16)[name = string("hidden_states_603_cast_fp16")]; + fp16 feed_forward2s_5_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_5_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.1p+3)]; + fp16 feed_forward2s_5_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_5_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.0ep+3)]; + tensor clip_164_cast_fp16 = clip(alpha = feed_forward2s_5_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_5_ffw_layer_2_input_max_to_fp16, x = hidden_states_603_cast_fp16)[name = string("clip_164_cast_fp16")]; + tensor feed_forward2s_5_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71221952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73319168))))[name = string("feed_forward2s_5_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_66_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_5_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_164_cast_fp16)[name = string("linear_66_cast_fp16")]; + fp16 feed_forward2s_5_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_5_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.2p+5)]; + fp16 feed_forward2s_5_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_5_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.1ep+5)]; + tensor clip_165_cast_fp16 = clip(alpha = feed_forward2s_5_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_5_ffw_layer_2_output_max_to_fp16, x = linear_66_cast_fp16)[name = string("clip_165_cast_fp16")]; + string clip_165_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_165_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_165_cast_fp16_to_fp32 = cast(dtype = clip_165_cast_fp16_to_fp32_dtype_0, x = clip_165_cast_fp16)[name = string("cast_278")]; + tensor clip_166 = clip(alpha = var_2787, beta = var_2786, x = clip_165_cast_fp16_to_fp32)[name = string("clip_166")]; + fp32 var_2781_promoted_1 = const()[name = string("op_2781_promoted_1"), val = fp32(0x1p+1)]; + tensor var_2827 = pow(x = clip_166, y = var_2781_promoted_1)[name = string("op_2827")]; + tensor var_2829_axes_0 = const()[name = string("op_2829_axes_0"), val = tensor([-1])]; + bool var_2829_keep_dims_0 = const()[name = string("op_2829_keep_dims_0"), val = bool(true)]; + tensor var_2829 = reduce_mean(axes = var_2829_axes_0, keep_dims = var_2829_keep_dims_0, x = var_2827)[name = string("op_2829")]; + string var_2829_to_fp16_dtype_0 = const()[name = string("op_2829_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2830_to_fp16 = const()[name = string("op_2830_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2829_to_fp16 = cast(dtype = var_2829_to_fp16_dtype_0, x = var_2829)[name = string("cast_277")]; + tensor mean_squared_105_cast_fp16 = add(x = var_2829_to_fp16, y = var_2830_to_fp16)[name = string("mean_squared_105_cast_fp16")]; + string mean_squared_105_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_105_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_105_cast_fp16_to_fp32 = cast(dtype = mean_squared_105_cast_fp16_to_fp32_dtype_0, x = mean_squared_105_cast_fp16)[name = string("cast_276")]; + tensor var_2832 = pow(x = mean_squared_105_cast_fp16_to_fp32, y = var_2785)[name = string("op_2832")]; + string clip_166_to_fp16_dtype_0 = const()[name = string("clip_166_to_fp16_dtype_0"), val = string("fp16")]; + string var_2832_to_fp16_dtype_0 = const()[name = string("op_2832_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_166_to_fp16 = cast(dtype = clip_166_to_fp16_dtype_0, x = clip_166)[name = string("cast_274")]; + tensor var_2832_to_fp16 = cast(dtype = var_2832_to_fp16_dtype_0, x = var_2832)[name = string("cast_275")]; + tensor normed_output_209_cast_fp16 = mul(x = clip_166_to_fp16, y = var_2832_to_fp16)[name = string("normed_output_209_cast_fp16")]; + tensor const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73320256)))]; + tensor normed_output_211_cast_fp16 = mul(x = normed_output_209_cast_fp16, y = const_90_to_fp16)[name = string("normed_output_211_cast_fp16")]; + fp16 var_2777_to_fp16 = const()[name = string("op_2777_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_615_cast_fp16 = mul(x = normed_output_211_cast_fp16, y = var_2777_to_fp16)[name = string("hidden_states_615_cast_fp16")]; + tensor hidden_states_617_cast_fp16 = add(x = hidden_states_615_cast_fp16, y = hidden_states_593_cast_fp16)[name = string("hidden_states_617_cast_fp16")]; + fp16 var_2839_to_fp16 = const()[name = string("op_2839_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_2840_to_fp16 = const()[name = string("op_2840_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_167_cast_fp16 = clip(alpha = var_2839_to_fp16, beta = var_2840_to_fp16, x = hidden_states_617_cast_fp16)[name = string("clip_167_cast_fp16")]; + string clip_167_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_167_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2842 = const()[name = string("op_2842"), val = fp32(-0x1p-1)]; + fp32 var_2846_promoted = const()[name = string("op_2846_promoted"), val = fp32(0x1p+1)]; + tensor clip_167_cast_fp16_to_fp32 = cast(dtype = clip_167_cast_fp16_to_fp32_dtype_0, x = clip_167_cast_fp16)[name = string("cast_273")]; + tensor var_2852 = pow(x = clip_167_cast_fp16_to_fp32, y = var_2846_promoted)[name = string("op_2852")]; + tensor var_2854_axes_0 = const()[name = string("op_2854_axes_0"), val = tensor([-1])]; + bool var_2854_keep_dims_0 = const()[name = string("op_2854_keep_dims_0"), val = bool(true)]; + tensor var_2854 = reduce_mean(axes = var_2854_axes_0, keep_dims = var_2854_keep_dims_0, x = var_2852)[name = string("op_2854")]; + string var_2854_to_fp16_dtype_0 = const()[name = string("op_2854_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2855_to_fp16 = const()[name = string("op_2855_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2854_to_fp16 = cast(dtype = var_2854_to_fp16_dtype_0, x = var_2854)[name = string("cast_272")]; + tensor mean_squared_107_cast_fp16 = add(x = var_2854_to_fp16, y = var_2855_to_fp16)[name = string("mean_squared_107_cast_fp16")]; + string mean_squared_107_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_107_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_107_cast_fp16_to_fp32 = cast(dtype = mean_squared_107_cast_fp16_to_fp32_dtype_0, x = mean_squared_107_cast_fp16)[name = string("cast_271")]; + tensor var_2857 = pow(x = mean_squared_107_cast_fp16_to_fp32, y = var_2842)[name = string("op_2857")]; + string var_2857_to_fp16_dtype_0 = const()[name = string("op_2857_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2857_to_fp16 = cast(dtype = var_2857_to_fp16_dtype_0, x = var_2857)[name = string("cast_270")]; + tensor normed_output_213_cast_fp16 = mul(x = clip_167_cast_fp16, y = var_2857_to_fp16)[name = string("normed_output_213_cast_fp16")]; + tensor const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73322368)))]; + tensor normed_output_215_cast_fp16 = mul(x = normed_output_213_cast_fp16, y = const_91_to_fp16)[name = string("normed_output_215_cast_fp16")]; + string normed_output_215_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_215_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2870 = const()[name = string("op_2870"), val = fp32(-0x1p-1)]; + fp32 var_2871 = const()[name = string("op_2871"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_2872 = const()[name = string("op_2872"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_215_cast_fp16_to_fp32 = cast(dtype = normed_output_215_cast_fp16_to_fp32_dtype_0, x = normed_output_215_cast_fp16)[name = string("cast_269")]; + tensor clip_168 = clip(alpha = var_2872, beta = var_2871, x = normed_output_215_cast_fp16_to_fp32)[name = string("clip_168")]; + fp32 var_2866_promoted = const()[name = string("op_2866_promoted"), val = fp32(0x1p+1)]; + tensor var_2880 = pow(x = clip_168, y = var_2866_promoted)[name = string("op_2880")]; + tensor var_2882_axes_0 = const()[name = string("op_2882_axes_0"), val = tensor([-1])]; + bool var_2882_keep_dims_0 = const()[name = string("op_2882_keep_dims_0"), val = bool(true)]; + tensor var_2882 = reduce_mean(axes = var_2882_axes_0, keep_dims = var_2882_keep_dims_0, x = var_2880)[name = string("op_2882")]; + string var_2882_to_fp16_dtype_0 = const()[name = string("op_2882_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2883_to_fp16 = const()[name = string("op_2883_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2882_to_fp16 = cast(dtype = var_2882_to_fp16_dtype_0, x = var_2882)[name = string("cast_268")]; + tensor mean_squared_109_cast_fp16 = add(x = var_2882_to_fp16, y = var_2883_to_fp16)[name = string("mean_squared_109_cast_fp16")]; + string mean_squared_109_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_109_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_109_cast_fp16_to_fp32 = cast(dtype = mean_squared_109_cast_fp16_to_fp32_dtype_0, x = mean_squared_109_cast_fp16)[name = string("cast_267")]; + tensor var_2885 = pow(x = mean_squared_109_cast_fp16_to_fp32, y = var_2870)[name = string("op_2885")]; + string clip_168_to_fp16_dtype_0 = const()[name = string("clip_168_to_fp16_dtype_0"), val = string("fp16")]; + string var_2885_to_fp16_dtype_0 = const()[name = string("op_2885_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_168_to_fp16 = cast(dtype = clip_168_to_fp16_dtype_0, x = clip_168)[name = string("cast_265")]; + tensor var_2885_to_fp16 = cast(dtype = var_2885_to_fp16_dtype_0, x = var_2885)[name = string("cast_266")]; + tensor normed_output_217_cast_fp16 = mul(x = clip_168_to_fp16, y = var_2885_to_fp16)[name = string("normed_output_217_cast_fp16")]; + tensor const_92_to_fp16 = const()[name = string("const_92_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73324480)))]; + tensor normed_output_219_cast_fp16 = mul(x = normed_output_217_cast_fp16, y = const_92_to_fp16)[name = string("normed_output_219_cast_fp16")]; + fp16 feed_forward1s_6_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_6_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.84p+3)]; + fp16 feed_forward1s_6_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_6_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.82p+3)]; + tensor clip_169_cast_fp16 = clip(alpha = feed_forward1s_6_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_6_ffw_layer_1_input_max_to_fp16, x = normed_output_219_cast_fp16)[name = string("clip_169_cast_fp16")]; + tensor feed_forward1s_6_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73326592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75423808))))[name = string("feed_forward1s_6_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_6_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_169_cast_fp16)[name = string("linear_67_cast_fp16")]; + fp16 feed_forward1s_6_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_6_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.9cp+4)]; + fp16 feed_forward1s_6_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_6_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.98p+4)]; + tensor clip_170_cast_fp16 = clip(alpha = feed_forward1s_6_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_6_ffw_layer_1_output_max_to_fp16, x = linear_67_cast_fp16)[name = string("clip_170_cast_fp16")]; + tensor hidden_states_633_cast_fp16 = silu(x = clip_170_cast_fp16)[name = string("hidden_states_633_cast_fp16")]; + fp16 feed_forward1s_6_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_6_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1p+3)]; + fp16 feed_forward1s_6_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_6_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.fcp+2)]; + tensor clip_171_cast_fp16 = clip(alpha = feed_forward1s_6_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_6_ffw_layer_2_input_max_to_fp16, x = hidden_states_633_cast_fp16)[name = string("clip_171_cast_fp16")]; + tensor feed_forward1s_6_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75427968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77525184))))[name = string("feed_forward1s_6_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_68_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_6_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_171_cast_fp16)[name = string("linear_68_cast_fp16")]; + fp16 feed_forward1s_6_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_6_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.d8p+4)]; + fp16 feed_forward1s_6_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_6_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.d4p+4)]; + tensor clip_172_cast_fp16 = clip(alpha = feed_forward1s_6_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_6_ffw_layer_2_output_max_to_fp16, x = linear_68_cast_fp16)[name = string("clip_172_cast_fp16")]; + string clip_172_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_172_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_172_cast_fp16_to_fp32 = cast(dtype = clip_172_cast_fp16_to_fp32_dtype_0, x = clip_172_cast_fp16)[name = string("cast_264")]; + tensor clip_173 = clip(alpha = var_2872, beta = var_2871, x = clip_172_cast_fp16_to_fp32)[name = string("clip_173")]; + fp32 var_2866_promoted_1 = const()[name = string("op_2866_promoted_1"), val = fp32(0x1p+1)]; + tensor var_2912 = pow(x = clip_173, y = var_2866_promoted_1)[name = string("op_2912")]; + tensor var_2914_axes_0 = const()[name = string("op_2914_axes_0"), val = tensor([-1])]; + bool var_2914_keep_dims_0 = const()[name = string("op_2914_keep_dims_0"), val = bool(true)]; + tensor var_2914 = reduce_mean(axes = var_2914_axes_0, keep_dims = var_2914_keep_dims_0, x = var_2912)[name = string("op_2914")]; + string var_2914_to_fp16_dtype_0 = const()[name = string("op_2914_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2915_to_fp16 = const()[name = string("op_2915_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2914_to_fp16 = cast(dtype = var_2914_to_fp16_dtype_0, x = var_2914)[name = string("cast_263")]; + tensor mean_squared_111_cast_fp16 = add(x = var_2914_to_fp16, y = var_2915_to_fp16)[name = string("mean_squared_111_cast_fp16")]; + string mean_squared_111_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_111_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_111_cast_fp16_to_fp32 = cast(dtype = mean_squared_111_cast_fp16_to_fp32_dtype_0, x = mean_squared_111_cast_fp16)[name = string("cast_262")]; + tensor var_2917 = pow(x = mean_squared_111_cast_fp16_to_fp32, y = var_2870)[name = string("op_2917")]; + string clip_173_to_fp16_dtype_0 = const()[name = string("clip_173_to_fp16_dtype_0"), val = string("fp16")]; + string var_2917_to_fp16_dtype_0 = const()[name = string("op_2917_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_173_to_fp16 = cast(dtype = clip_173_to_fp16_dtype_0, x = clip_173)[name = string("cast_260")]; + tensor var_2917_to_fp16 = cast(dtype = var_2917_to_fp16_dtype_0, x = var_2917)[name = string("cast_261")]; + tensor normed_output_221_cast_fp16 = mul(x = clip_173_to_fp16, y = var_2917_to_fp16)[name = string("normed_output_221_cast_fp16")]; + tensor const_93_to_fp16 = const()[name = string("const_93_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77526272)))]; + tensor normed_output_223_cast_fp16 = mul(x = normed_output_221_cast_fp16, y = const_93_to_fp16)[name = string("normed_output_223_cast_fp16")]; + fp16 var_2862_to_fp16 = const()[name = string("op_2862_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_645_cast_fp16 = mul(x = normed_output_223_cast_fp16, y = var_2862_to_fp16)[name = string("hidden_states_645_cast_fp16")]; + tensor hidden_states_647_cast_fp16 = add(x = hidden_states_645_cast_fp16, y = normed_output_215_cast_fp16)[name = string("hidden_states_647_cast_fp16")]; + fp16 var_2924_to_fp16 = const()[name = string("op_2924_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_2925_to_fp16 = const()[name = string("op_2925_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_174_cast_fp16 = clip(alpha = var_2924_to_fp16, beta = var_2925_to_fp16, x = hidden_states_647_cast_fp16)[name = string("clip_174_cast_fp16")]; + string clip_174_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_174_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_2927 = const()[name = string("op_2927"), val = fp32(-0x1p-1)]; + fp32 var_2931_promoted = const()[name = string("op_2931_promoted"), val = fp32(0x1p+1)]; + tensor clip_174_cast_fp16_to_fp32 = cast(dtype = clip_174_cast_fp16_to_fp32_dtype_0, x = clip_174_cast_fp16)[name = string("cast_259")]; + tensor var_2937 = pow(x = clip_174_cast_fp16_to_fp32, y = var_2931_promoted)[name = string("op_2937")]; + tensor var_2939_axes_0 = const()[name = string("op_2939_axes_0"), val = tensor([-1])]; + bool var_2939_keep_dims_0 = const()[name = string("op_2939_keep_dims_0"), val = bool(true)]; + tensor var_2939 = reduce_mean(axes = var_2939_axes_0, keep_dims = var_2939_keep_dims_0, x = var_2937)[name = string("op_2939")]; + string var_2939_to_fp16_dtype_0 = const()[name = string("op_2939_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_2940_to_fp16 = const()[name = string("op_2940_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_2939_to_fp16 = cast(dtype = var_2939_to_fp16_dtype_0, x = var_2939)[name = string("cast_258")]; + tensor mean_squared_113_cast_fp16 = add(x = var_2939_to_fp16, y = var_2940_to_fp16)[name = string("mean_squared_113_cast_fp16")]; + string mean_squared_113_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_113_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_113_cast_fp16_to_fp32 = cast(dtype = mean_squared_113_cast_fp16_to_fp32_dtype_0, x = mean_squared_113_cast_fp16)[name = string("cast_257")]; + tensor var_2942 = pow(x = mean_squared_113_cast_fp16_to_fp32, y = var_2927)[name = string("op_2942")]; + string var_2942_to_fp16_dtype_0 = const()[name = string("op_2942_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_2942_to_fp16 = cast(dtype = var_2942_to_fp16_dtype_0, x = var_2942)[name = string("cast_256")]; + tensor normed_output_225_cast_fp16 = mul(x = clip_174_cast_fp16, y = var_2942_to_fp16)[name = string("normed_output_225_cast_fp16")]; + tensor const_94_to_fp16 = const()[name = string("const_94_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77528384)))]; + tensor normed_output_227_cast_fp16 = mul(x = normed_output_225_cast_fp16, y = const_94_to_fp16)[name = string("normed_output_227_cast_fp16")]; + int32 var_2948 = const()[name = string("op_2948"), val = int32(-1)]; + fp32 var_2949 = const()[name = string("op_2949"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_6_q_proj_input_min_to_fp16 = const()[name = string("self_attns_6_q_proj_input_min_to_fp16"), val = fp16(-0x1.4ap+3)]; + fp16 self_attns_6_q_proj_input_max_to_fp16 = const()[name = string("self_attns_6_q_proj_input_max_to_fp16"), val = fp16(0x1.48p+3)]; + tensor clip_175_cast_fp16 = clip(alpha = self_attns_6_q_proj_input_min_to_fp16, beta = self_attns_6_q_proj_input_max_to_fp16, x = normed_output_227_cast_fp16)[name = string("clip_175_cast_fp16")]; + tensor self_attns_6_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77530496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78054848))))[name = string("self_attns_6_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_69_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_6_q_proj_linear_weight_to_fp16_palettized, x = clip_175_cast_fp16)[name = string("linear_69_cast_fp16")]; + fp16 self_attns_6_q_proj_output_min_to_fp16 = const()[name = string("self_attns_6_q_proj_output_min_to_fp16"), val = fp16(-0x1.fp+3)]; + fp16 self_attns_6_q_proj_output_max_to_fp16 = const()[name = string("self_attns_6_q_proj_output_max_to_fp16"), val = fp16(0x1.ecp+3)]; + tensor clip_176_cast_fp16 = clip(alpha = self_attns_6_q_proj_output_min_to_fp16, beta = self_attns_6_q_proj_output_max_to_fp16, x = linear_69_cast_fp16)[name = string("clip_176_cast_fp16")]; + tensor var_2993 = const()[name = string("op_2993"), val = tensor([1, 50, 8, 128])]; + tensor q_13_cast_fp16 = reshape(shape = var_2993, x = clip_176_cast_fp16)[name = string("q_13_cast_fp16")]; + tensor self_attns_6_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78055936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78580288))))[name = string("self_attns_6_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_6_k_proj_linear_weight_to_fp16_palettized, x = clip_175_cast_fp16)[name = string("linear_70_cast_fp16")]; + fp16 self_attns_6_k_proj_output_min_to_fp16 = const()[name = string("self_attns_6_k_proj_output_min_to_fp16"), val = fp16(-0x1.fp+3)]; + fp16 self_attns_6_k_proj_output_max_to_fp16 = const()[name = string("self_attns_6_k_proj_output_max_to_fp16"), val = fp16(0x1.ecp+3)]; + tensor clip_178_cast_fp16 = clip(alpha = self_attns_6_k_proj_output_min_to_fp16, beta = self_attns_6_k_proj_output_max_to_fp16, x = linear_70_cast_fp16)[name = string("clip_178_cast_fp16")]; + tensor var_3005 = const()[name = string("op_3005"), val = tensor([1, 50, 8, 128])]; + tensor k_13_cast_fp16 = reshape(shape = var_3005, x = clip_178_cast_fp16)[name = string("k_13_cast_fp16")]; + tensor self_attns_6_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78581376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79105728))))[name = string("self_attns_6_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_71_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_6_v_proj_linear_weight_to_fp16_palettized, x = clip_175_cast_fp16)[name = string("linear_71_cast_fp16")]; + fp16 self_attns_6_v_proj_output_min_to_fp16 = const()[name = string("self_attns_6_v_proj_output_min_to_fp16"), val = fp16(-0x1.fp+3)]; + fp16 self_attns_6_v_proj_output_max_to_fp16 = const()[name = string("self_attns_6_v_proj_output_max_to_fp16"), val = fp16(0x1.ecp+3)]; + tensor clip_180_cast_fp16 = clip(alpha = self_attns_6_v_proj_output_min_to_fp16, beta = self_attns_6_v_proj_output_max_to_fp16, x = linear_71_cast_fp16)[name = string("clip_180_cast_fp16")]; + tensor var_3017 = const()[name = string("op_3017"), val = tensor([1, 50, 8, 128])]; + tensor input_285_cast_fp16 = reshape(shape = var_3017, x = clip_180_cast_fp16)[name = string("input_285_cast_fp16")]; + fp16 var_3019_to_fp16 = const()[name = string("op_3019_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_3020_cast_fp16 = mul(x = q_13_cast_fp16, y = var_3019_to_fp16)[name = string("op_3020_cast_fp16")]; + tensor var_3021_to_fp16 = const()[name = string("op_3021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79106816)))]; + tensor input_281_cast_fp16 = mul(x = var_3020_cast_fp16, y = var_3021_to_fp16)[name = string("input_281_cast_fp16")]; + fp16 var_3023_to_fp16 = const()[name = string("op_3023_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_283_cast_fp16 = mul(x = k_13_cast_fp16, y = var_3023_to_fp16)[name = string("input_283_cast_fp16")]; + tensor q_padded_13_pad_0 = const()[name = string("q_padded_13_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_13_mode_0 = const()[name = string("q_padded_13_mode_0"), val = string("constant")]; + fp16 const_95_to_fp16 = const()[name = string("const_95_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_13_cast_fp16 = pad(constant_val = const_95_to_fp16, mode = q_padded_13_mode_0, pad = q_padded_13_pad_0, x = input_281_cast_fp16)[name = string("q_padded_13_cast_fp16")]; + tensor var_3027 = const()[name = string("op_3027"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_13_cast_fp16 = reshape(shape = var_3027, x = q_padded_13_cast_fp16)[name = string("q_blocks_13_cast_fp16")]; + tensor k_padded_13_pad_0 = const()[name = string("k_padded_13_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_13_mode_0 = const()[name = string("k_padded_13_mode_0"), val = string("constant")]; + fp16 const_96_to_fp16 = const()[name = string("const_96_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_13_cast_fp16 = pad(constant_val = const_96_to_fp16, mode = k_padded_13_mode_0, pad = k_padded_13_pad_0, x = input_283_cast_fp16)[name = string("k_padded_13_cast_fp16")]; + tensor v_padded_13_pad_0 = const()[name = string("v_padded_13_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_13_mode_0 = const()[name = string("v_padded_13_mode_0"), val = string("constant")]; + fp16 const_97_to_fp16 = const()[name = string("const_97_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_13_cast_fp16 = pad(constant_val = const_97_to_fp16, mode = v_padded_13_mode_0, pad = v_padded_13_pad_0, x = input_285_cast_fp16)[name = string("v_padded_13_cast_fp16")]; + tensor var_3034_begin_0 = const()[name = string("op_3034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3034_end_0 = const()[name = string("op_3034_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_3034_end_mask_0 = const()[name = string("op_3034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3034_cast_fp16 = slice_by_index(begin = var_3034_begin_0, end = var_3034_end_0, end_mask = var_3034_end_mask_0, x = k_padded_13_cast_fp16)[name = string("op_3034_cast_fp16")]; + tensor var_3036_begin_0 = const()[name = string("op_3036_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_3036_end_0 = const()[name = string("op_3036_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_3036_end_mask_0 = const()[name = string("op_3036_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3036_cast_fp16 = slice_by_index(begin = var_3036_begin_0, end = var_3036_end_0, end_mask = var_3036_end_mask_0, x = k_padded_13_cast_fp16)[name = string("op_3036_cast_fp16")]; + tensor var_3038_begin_0 = const()[name = string("op_3038_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_3038_end_0 = const()[name = string("op_3038_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_3038_end_mask_0 = const()[name = string("op_3038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3038_cast_fp16 = slice_by_index(begin = var_3038_begin_0, end = var_3038_end_0, end_mask = var_3038_end_mask_0, x = k_padded_13_cast_fp16)[name = string("op_3038_cast_fp16")]; + tensor var_3040_begin_0 = const()[name = string("op_3040_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_3040_end_0 = const()[name = string("op_3040_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_3040_end_mask_0 = const()[name = string("op_3040_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3040_cast_fp16 = slice_by_index(begin = var_3040_begin_0, end = var_3040_end_0, end_mask = var_3040_end_mask_0, x = k_padded_13_cast_fp16)[name = string("op_3040_cast_fp16")]; + tensor var_3042_begin_0 = const()[name = string("op_3042_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_3042_end_0 = const()[name = string("op_3042_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_3042_end_mask_0 = const()[name = string("op_3042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3042_cast_fp16 = slice_by_index(begin = var_3042_begin_0, end = var_3042_end_0, end_mask = var_3042_end_mask_0, x = k_padded_13_cast_fp16)[name = string("op_3042_cast_fp16")]; + int32 k_blocks_13_axis_0 = const()[name = string("k_blocks_13_axis_0"), val = int32(1)]; + tensor k_blocks_13_cast_fp16 = stack(axis = k_blocks_13_axis_0, values = (var_3034_cast_fp16, var_3036_cast_fp16, var_3038_cast_fp16, var_3040_cast_fp16, var_3042_cast_fp16))[name = string("k_blocks_13_cast_fp16")]; + tensor var_3046_begin_0 = const()[name = string("op_3046_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3046_end_0 = const()[name = string("op_3046_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_3046_end_mask_0 = const()[name = string("op_3046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3046_cast_fp16 = slice_by_index(begin = var_3046_begin_0, end = var_3046_end_0, end_mask = var_3046_end_mask_0, x = v_padded_13_cast_fp16)[name = string("op_3046_cast_fp16")]; + tensor var_3048_begin_0 = const()[name = string("op_3048_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_3048_end_0 = const()[name = string("op_3048_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_3048_end_mask_0 = const()[name = string("op_3048_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3048_cast_fp16 = slice_by_index(begin = var_3048_begin_0, end = var_3048_end_0, end_mask = var_3048_end_mask_0, x = v_padded_13_cast_fp16)[name = string("op_3048_cast_fp16")]; + tensor var_3050_begin_0 = const()[name = string("op_3050_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_3050_end_0 = const()[name = string("op_3050_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_3050_end_mask_0 = const()[name = string("op_3050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3050_cast_fp16 = slice_by_index(begin = var_3050_begin_0, end = var_3050_end_0, end_mask = var_3050_end_mask_0, x = v_padded_13_cast_fp16)[name = string("op_3050_cast_fp16")]; + tensor var_3052_begin_0 = const()[name = string("op_3052_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_3052_end_0 = const()[name = string("op_3052_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_3052_end_mask_0 = const()[name = string("op_3052_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3052_cast_fp16 = slice_by_index(begin = var_3052_begin_0, end = var_3052_end_0, end_mask = var_3052_end_mask_0, x = v_padded_13_cast_fp16)[name = string("op_3052_cast_fp16")]; + tensor var_3054_begin_0 = const()[name = string("op_3054_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_3054_end_0 = const()[name = string("op_3054_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_3054_end_mask_0 = const()[name = string("op_3054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3054_cast_fp16 = slice_by_index(begin = var_3054_begin_0, end = var_3054_end_0, end_mask = var_3054_end_mask_0, x = v_padded_13_cast_fp16)[name = string("op_3054_cast_fp16")]; + int32 v_blocks_13_axis_0 = const()[name = string("v_blocks_13_axis_0"), val = int32(1)]; + tensor v_blocks_13_cast_fp16 = stack(axis = v_blocks_13_axis_0, values = (var_3046_cast_fp16, var_3048_cast_fp16, var_3050_cast_fp16, var_3052_cast_fp16, var_3054_cast_fp16))[name = string("v_blocks_13_cast_fp16")]; + tensor var_3062 = const()[name = string("op_3062"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_3064 = const()[name = string("op_3064"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_13_transpose_x_0 = const()[name = string("matrix_ac_13_transpose_x_0"), val = bool(false)]; + bool matrix_ac_13_transpose_y_0 = const()[name = string("matrix_ac_13_transpose_y_0"), val = bool(false)]; + tensor queries_13_cast_fp16 = transpose(perm = var_3062, x = q_blocks_13_cast_fp16)[name = string("transpose_34")]; + tensor keys_t_13_cast_fp16 = transpose(perm = var_3064, x = k_blocks_13_cast_fp16)[name = string("transpose_35")]; + tensor matrix_ac_13_cast_fp16 = matmul(transpose_x = matrix_ac_13_transpose_x_0, transpose_y = matrix_ac_13_transpose_y_0, x = queries_13_cast_fp16, y = keys_t_13_cast_fp16)[name = string("matrix_ac_13_cast_fp16")]; + tensor var_3067 = const()[name = string("op_3067"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_13_cast_fp16 = reshape(shape = var_3067, x = queries_13_cast_fp16)[name = string("q_flat_13_cast_fp16")]; + bool matrix_bd_61_transpose_x_0 = const()[name = string("matrix_bd_61_transpose_x_0"), val = bool(false)]; + bool matrix_bd_61_transpose_y_0 = const()[name = string("matrix_bd_61_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_13_to_fp16 = const()[name = string("rel_k_t_13_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79107136)))]; + tensor matrix_bd_61_cast_fp16 = matmul(transpose_x = matrix_bd_61_transpose_x_0, transpose_y = matrix_bd_61_transpose_y_0, x = q_flat_13_cast_fp16, y = rel_k_t_13_to_fp16)[name = string("matrix_bd_61_cast_fp16")]; + tensor var_3072 = const()[name = string("op_3072"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_287_cast_fp16 = reshape(shape = var_3072, x = matrix_bd_61_cast_fp16)[name = string("input_287_cast_fp16")]; + tensor matrix_bd_63_pad_0 = const()[name = string("matrix_bd_63_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79133824)))]; + string matrix_bd_63_mode_0 = const()[name = string("matrix_bd_63_mode_0"), val = string("constant")]; + fp16 const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_63_cast_fp16 = pad(constant_val = const_99_to_fp16, mode = matrix_bd_63_mode_0, pad = matrix_bd_63_pad_0, x = input_287_cast_fp16)[name = string("matrix_bd_63_cast_fp16")]; + tensor var_3076 = const()[name = string("op_3076"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_65_cast_fp16 = reshape(shape = var_3076, x = matrix_bd_63_cast_fp16)[name = string("matrix_bd_65_cast_fp16")]; + tensor matrix_bd_67_begin_0 = const()[name = string("matrix_bd_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_67_end_0 = const()[name = string("matrix_bd_67_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_67_end_mask_0 = const()[name = string("matrix_bd_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_67_cast_fp16 = slice_by_index(begin = matrix_bd_67_begin_0, end = matrix_bd_67_end_0, end_mask = matrix_bd_67_end_mask_0, x = matrix_bd_65_cast_fp16)[name = string("matrix_bd_67_cast_fp16")]; + tensor var_3082 = const()[name = string("op_3082"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_69_cast_fp16 = reshape(shape = var_3082, x = matrix_bd_67_cast_fp16)[name = string("matrix_bd_69_cast_fp16")]; + tensor attn_37_cast_fp16 = add(x = matrix_ac_13_cast_fp16, y = matrix_bd_69_cast_fp16)[name = string("attn_37_cast_fp16")]; + fp16 _inversed_3085_y_0_to_fp16 = const()[name = string("_inversed_3085_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_3085_cast_fp16 = mul(x = attn_37_cast_fp16, y = _inversed_3085_y_0_to_fp16)[name = string("_inversed_3085_cast_fp16")]; + string _inversed_3085_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_3085_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_3085_cast_fp16_to_fp32 = cast(dtype = _inversed_3085_cast_fp16_to_fp32_dtype_0, x = _inversed_3085_cast_fp16)[name = string("cast_255")]; + tensor var_3086 = tanh(x = _inversed_3085_cast_fp16_to_fp32)[name = string("op_3086")]; + string var_3086_to_fp16_dtype_0 = const()[name = string("op_3086_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_6_softcap_to_fp16 = const()[name = string("self_attns_6_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_3086_to_fp16 = cast(dtype = var_3086_to_fp16_dtype_0, x = var_3086)[name = string("cast_254")]; + tensor attn_39_cast_fp16 = mul(x = var_3086_to_fp16, y = self_attns_6_softcap_to_fp16)[name = string("attn_39_cast_fp16")]; + string attn_39_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_39_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_39_cast_fp16_to_fp32 = cast(dtype = attn_39_cast_fp16_to_fp32_dtype_0, x = attn_39_cast_fp16)[name = string("cast_253")]; + tensor input_289 = select(a = var_2949, b = attn_39_cast_fp16_to_fp32, cond = var_460)[name = string("input_289")]; + tensor var_3090 = softmax(axis = var_2948, x = input_289)[name = string("op_3090")]; + tensor var_3092 = const()[name = string("op_3092"), val = tensor([0, 3, 1, -3, -1])]; + bool out_37_transpose_x_0 = const()[name = string("out_37_transpose_x_0"), val = bool(false)]; + bool out_37_transpose_y_0 = const()[name = string("out_37_transpose_y_0"), val = bool(false)]; + string var_3090_to_fp16_dtype_0 = const()[name = string("op_3090_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_13_cast_fp16 = transpose(perm = var_3092, x = v_blocks_13_cast_fp16)[name = string("transpose_33")]; + tensor var_3090_to_fp16 = cast(dtype = var_3090_to_fp16_dtype_0, x = var_3090)[name = string("cast_252")]; + tensor out_37_cast_fp16 = matmul(transpose_x = out_37_transpose_x_0, transpose_y = out_37_transpose_y_0, x = var_3090_to_fp16, y = values_t_13_cast_fp16)[name = string("out_37_cast_fp16")]; + tensor var_3095 = const()[name = string("op_3095"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_3097 = const()[name = string("op_3097"), val = tensor([1, 60, 1024])]; + tensor var_3096_cast_fp16 = transpose(perm = var_3095, x = out_37_cast_fp16)[name = string("transpose_32")]; + tensor out_39_cast_fp16 = reshape(shape = var_3097, x = var_3096_cast_fp16)[name = string("out_39_cast_fp16")]; + tensor var_3100_begin_0 = const()[name = string("op_3100_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3100_end_0 = const()[name = string("op_3100_end_0"), val = tensor([1, 50, 1024])]; + tensor var_3100_end_mask_0 = const()[name = string("op_3100_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3100_cast_fp16 = slice_by_index(begin = var_3100_begin_0, end = var_3100_end_0, end_mask = var_3100_end_mask_0, x = out_39_cast_fp16)[name = string("op_3100_cast_fp16")]; + fp16 self_attns_6_post_input_min_to_fp16 = const()[name = string("self_attns_6_post_input_min_to_fp16"), val = fp16(-0x1.e2p+3)]; + fp16 self_attns_6_post_input_max_to_fp16 = const()[name = string("self_attns_6_post_input_max_to_fp16"), val = fp16(0x1.dep+3)]; + tensor clip_181_cast_fp16 = clip(alpha = self_attns_6_post_input_min_to_fp16, beta = self_attns_6_post_input_max_to_fp16, x = var_3100_cast_fp16)[name = string("clip_181_cast_fp16")]; + tensor self_attns_6_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79133952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79658304))))[name = string("self_attns_6_post_linear_weight_to_fp16_palettized")]; + tensor linear_73_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_6_post_linear_weight_to_fp16_palettized, x = clip_181_cast_fp16)[name = string("linear_73_cast_fp16")]; + fp16 self_attns_6_post_output_min_to_fp16 = const()[name = string("self_attns_6_post_output_min_to_fp16"), val = fp16(-0x1.88p+5)]; + fp16 self_attns_6_post_output_max_to_fp16 = const()[name = string("self_attns_6_post_output_max_to_fp16"), val = fp16(0x1.84p+5)]; + tensor clip_182_cast_fp16 = clip(alpha = self_attns_6_post_output_min_to_fp16, beta = self_attns_6_post_output_max_to_fp16, x = linear_73_cast_fp16)[name = string("clip_182_cast_fp16")]; + fp16 var_3112_to_fp16 = const()[name = string("op_3112_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_3113_to_fp16 = const()[name = string("op_3113_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_183_cast_fp16 = clip(alpha = var_3112_to_fp16, beta = var_3113_to_fp16, x = clip_182_cast_fp16)[name = string("clip_183_cast_fp16")]; + string clip_183_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_183_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3115 = const()[name = string("op_3115"), val = fp32(-0x1p-1)]; + fp32 var_3119_promoted = const()[name = string("op_3119_promoted"), val = fp32(0x1p+1)]; + tensor clip_183_cast_fp16_to_fp32 = cast(dtype = clip_183_cast_fp16_to_fp32_dtype_0, x = clip_183_cast_fp16)[name = string("cast_251")]; + tensor var_3125 = pow(x = clip_183_cast_fp16_to_fp32, y = var_3119_promoted)[name = string("op_3125")]; + tensor var_3127_axes_0 = const()[name = string("op_3127_axes_0"), val = tensor([-1])]; + bool var_3127_keep_dims_0 = const()[name = string("op_3127_keep_dims_0"), val = bool(true)]; + tensor var_3127 = reduce_mean(axes = var_3127_axes_0, keep_dims = var_3127_keep_dims_0, x = var_3125)[name = string("op_3127")]; + string var_3127_to_fp16_dtype_0 = const()[name = string("op_3127_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3128_to_fp16 = const()[name = string("op_3128_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3127_to_fp16 = cast(dtype = var_3127_to_fp16_dtype_0, x = var_3127)[name = string("cast_250")]; + tensor mean_squared_115_cast_fp16 = add(x = var_3127_to_fp16, y = var_3128_to_fp16)[name = string("mean_squared_115_cast_fp16")]; + string mean_squared_115_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_115_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_115_cast_fp16_to_fp32 = cast(dtype = mean_squared_115_cast_fp16_to_fp32_dtype_0, x = mean_squared_115_cast_fp16)[name = string("cast_249")]; + tensor var_3130 = pow(x = mean_squared_115_cast_fp16_to_fp32, y = var_3115)[name = string("op_3130")]; + string var_3130_to_fp16_dtype_0 = const()[name = string("op_3130_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_3130_to_fp16 = cast(dtype = var_3130_to_fp16_dtype_0, x = var_3130)[name = string("cast_248")]; + tensor normed_output_229_cast_fp16 = mul(x = clip_183_cast_fp16, y = var_3130_to_fp16)[name = string("normed_output_229_cast_fp16")]; + tensor const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79659392)))]; + tensor normed_output_231_cast_fp16 = mul(x = normed_output_229_cast_fp16, y = const_100_to_fp16)[name = string("normed_output_231_cast_fp16")]; + tensor hidden_states_673_cast_fp16 = add(x = normed_output_231_cast_fp16, y = hidden_states_647_cast_fp16)[name = string("hidden_states_673_cast_fp16")]; + string hidden_states_673_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_673_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3137 = const()[name = string("op_3137"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_3138 = const()[name = string("op_3138"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_3150 = const()[name = string("op_3150"), val = fp32(-0x1p-1)]; + fp32 var_3146_promoted = const()[name = string("op_3146_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_673_cast_fp16_to_fp32 = cast(dtype = hidden_states_673_cast_fp16_to_fp32_dtype_0, x = hidden_states_673_cast_fp16)[name = string("cast_247")]; + tensor var_3158 = pow(x = hidden_states_673_cast_fp16_to_fp32, y = var_3146_promoted)[name = string("op_3158")]; + tensor var_3160_axes_0 = const()[name = string("op_3160_axes_0"), val = tensor([-1])]; + bool var_3160_keep_dims_0 = const()[name = string("op_3160_keep_dims_0"), val = bool(true)]; + tensor var_3160 = reduce_mean(axes = var_3160_axes_0, keep_dims = var_3160_keep_dims_0, x = var_3158)[name = string("op_3160")]; + string var_3160_to_fp16_dtype_0 = const()[name = string("op_3160_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3161_to_fp16 = const()[name = string("op_3161_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3160_to_fp16 = cast(dtype = var_3160_to_fp16_dtype_0, x = var_3160)[name = string("cast_246")]; + tensor mean_squared_117_cast_fp16 = add(x = var_3160_to_fp16, y = var_3161_to_fp16)[name = string("mean_squared_117_cast_fp16")]; + string mean_squared_117_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_117_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_117_cast_fp16_to_fp32 = cast(dtype = mean_squared_117_cast_fp16_to_fp32_dtype_0, x = mean_squared_117_cast_fp16)[name = string("cast_245")]; + tensor var_3163 = pow(x = mean_squared_117_cast_fp16_to_fp32, y = var_3150)[name = string("op_3163")]; + string var_3163_to_fp16_dtype_0 = const()[name = string("op_3163_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_3163_to_fp16 = cast(dtype = var_3163_to_fp16_dtype_0, x = var_3163)[name = string("cast_244")]; + tensor normed_output_233_cast_fp16 = mul(x = hidden_states_673_cast_fp16, y = var_3163_to_fp16)[name = string("normed_output_233_cast_fp16")]; + tensor const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79661504)))]; + tensor normed_output_235_cast_fp16 = mul(x = normed_output_233_cast_fp16, y = const_101_to_fp16)[name = string("normed_output_235_cast_fp16")]; + fp16 lconv1ds_6_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_6_linear_start_input_min_to_fp16"), val = fp16(-0x1.5ep+3)]; + fp16 lconv1ds_6_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_6_linear_start_input_max_to_fp16"), val = fp16(0x1.5cp+3)]; + tensor clip_184_cast_fp16 = clip(alpha = lconv1ds_6_linear_start_input_min_to_fp16, beta = lconv1ds_6_linear_start_input_max_to_fp16, x = normed_output_235_cast_fp16)[name = string("clip_184_cast_fp16")]; + tensor lconv1ds_6_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79663616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80712256))))[name = string("lconv1ds_6_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_74_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_6_linear_start_linear_weight_to_fp16_palettized, x = clip_184_cast_fp16)[name = string("linear_74_cast_fp16")]; + fp16 lconv1ds_6_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_6_linear_start_output_min_to_fp16"), val = fp16(-0x1.6ap+4)]; + fp16 lconv1ds_6_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_6_linear_start_output_max_to_fp16"), val = fp16(0x1.66p+4)]; + tensor clip_185_cast_fp16 = clip(alpha = lconv1ds_6_linear_start_output_min_to_fp16, beta = lconv1ds_6_linear_start_output_max_to_fp16, x = linear_74_cast_fp16)[name = string("clip_185_cast_fp16")]; + int32 hidden_states_681_split_num_splits_0 = const()[name = string("hidden_states_681_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_681_split_axis_0 = const()[name = string("hidden_states_681_split_axis_0"), val = int32(-1)]; + tensor hidden_states_681_split_cast_fp16_0, tensor hidden_states_681_split_cast_fp16_1 = split(axis = hidden_states_681_split_axis_0, num_splits = hidden_states_681_split_num_splits_0, x = clip_185_cast_fp16)[name = string("hidden_states_681_split_cast_fp16")]; + tensor hidden_states_681_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_681_split_cast_fp16_1)[name = string("hidden_states_681_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_681_cast_fp16 = mul(x = hidden_states_681_split_cast_fp16_0, y = hidden_states_681_split_1_sigmoid_cast_fp16)[name = string("hidden_states_681_cast_fp16")]; + tensor input_297_perm_0 = const()[name = string("input_297_perm_0"), val = tensor([0, 2, 1])]; + tensor input_299_pad_0 = const()[name = string("input_299_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_299_mode_0 = const()[name = string("input_299_mode_0"), val = string("constant")]; + fp16 const_102_to_fp16 = const()[name = string("const_102_to_fp16"), val = fp16(0x0p+0)]; + tensor input_297_cast_fp16 = transpose(perm = input_297_perm_0, x = hidden_states_681_cast_fp16)[name = string("transpose_31")]; + tensor input_299_cast_fp16 = pad(constant_val = const_102_to_fp16, mode = input_299_mode_0, pad = input_299_pad_0, x = input_297_cast_fp16)[name = string("input_299_cast_fp16")]; + string var_3189_pad_type_0 = const()[name = string("op_3189_pad_type_0"), val = string("valid")]; + int32 var_3189_groups_0 = const()[name = string("op_3189_groups_0"), val = int32(1024)]; + tensor var_3189_strides_0 = const()[name = string("op_3189_strides_0"), val = tensor([1])]; + tensor var_3189_pad_0 = const()[name = string("op_3189_pad_0"), val = tensor([0, 0])]; + tensor var_3189_dilations_0 = const()[name = string("op_3189_dilations_0"), val = tensor([1])]; + tensor lconv1ds_6_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80714368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80716992))))[name = string("lconv1ds_6_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_3189_cast_fp16 = conv(dilations = var_3189_dilations_0, groups = var_3189_groups_0, pad = var_3189_pad_0, pad_type = var_3189_pad_type_0, strides = var_3189_strides_0, weight = lconv1ds_6_depthwise_conv1d_weight_to_fp16_palettized, x = input_299_cast_fp16)[name = string("op_3189_cast_fp16")]; + tensor hidden_states_683_perm_0 = const()[name = string("hidden_states_683_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_683_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_683_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_683_cast_fp16 = transpose(perm = hidden_states_683_perm_0, x = var_3189_cast_fp16)[name = string("transpose_30")]; + tensor hidden_states_683_cast_fp16_to_fp32 = cast(dtype = hidden_states_683_cast_fp16_to_fp32_dtype_0, x = hidden_states_683_cast_fp16)[name = string("cast_243")]; + tensor clip_186 = clip(alpha = var_3138, beta = var_3137, x = hidden_states_683_cast_fp16_to_fp32)[name = string("clip_186")]; + fp32 var_3146_promoted_1 = const()[name = string("op_3146_promoted_1"), val = fp32(0x1p+1)]; + tensor var_3194 = pow(x = clip_186, y = var_3146_promoted_1)[name = string("op_3194")]; + tensor var_3196_axes_0 = const()[name = string("op_3196_axes_0"), val = tensor([-1])]; + bool var_3196_keep_dims_0 = const()[name = string("op_3196_keep_dims_0"), val = bool(true)]; + tensor var_3196 = reduce_mean(axes = var_3196_axes_0, keep_dims = var_3196_keep_dims_0, x = var_3194)[name = string("op_3196")]; + string var_3196_to_fp16_dtype_0 = const()[name = string("op_3196_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3197_to_fp16 = const()[name = string("op_3197_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3196_to_fp16 = cast(dtype = var_3196_to_fp16_dtype_0, x = var_3196)[name = string("cast_242")]; + tensor mean_squared_119_cast_fp16 = add(x = var_3196_to_fp16, y = var_3197_to_fp16)[name = string("mean_squared_119_cast_fp16")]; + string mean_squared_119_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_119_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_119_cast_fp16_to_fp32 = cast(dtype = mean_squared_119_cast_fp16_to_fp32_dtype_0, x = mean_squared_119_cast_fp16)[name = string("cast_241")]; + tensor var_3199 = pow(x = mean_squared_119_cast_fp16_to_fp32, y = var_3150)[name = string("op_3199")]; + string clip_186_to_fp16_dtype_0 = const()[name = string("clip_186_to_fp16_dtype_0"), val = string("fp16")]; + string var_3199_to_fp16_dtype_0 = const()[name = string("op_3199_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_186_to_fp16 = cast(dtype = clip_186_to_fp16_dtype_0, x = clip_186)[name = string("cast_239")]; + tensor var_3199_to_fp16 = cast(dtype = var_3199_to_fp16_dtype_0, x = var_3199)[name = string("cast_240")]; + tensor normed_output_237_cast_fp16 = mul(x = clip_186_to_fp16, y = var_3199_to_fp16)[name = string("normed_output_237_cast_fp16")]; + tensor const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80718080)))]; + tensor normed_output_239_cast_fp16 = mul(x = normed_output_237_cast_fp16, y = const_103_to_fp16)[name = string("normed_output_239_cast_fp16")]; + tensor hidden_states_689_cast_fp16 = silu(x = normed_output_239_cast_fp16)[name = string("hidden_states_689_cast_fp16")]; + fp16 lconv1ds_6_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_6_linear_end_input_min_to_fp16"), val = fp16(-0x1.d2p+3)]; + fp16 lconv1ds_6_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_6_linear_end_input_max_to_fp16"), val = fp16(0x1.cep+3)]; + tensor clip_187_cast_fp16 = clip(alpha = lconv1ds_6_linear_end_input_min_to_fp16, beta = lconv1ds_6_linear_end_input_max_to_fp16, x = hidden_states_689_cast_fp16)[name = string("clip_187_cast_fp16")]; + tensor lconv1ds_6_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80720192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81244544))))[name = string("lconv1ds_6_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_75_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_6_linear_end_linear_weight_to_fp16_palettized, x = clip_187_cast_fp16)[name = string("linear_75_cast_fp16")]; + fp16 lconv1ds_6_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_6_linear_end_output_min_to_fp16"), val = fp16(-0x1.f8p+2)]; + fp16 lconv1ds_6_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_6_linear_end_output_max_to_fp16"), val = fp16(0x1.f4p+2)]; + tensor clip_188_cast_fp16 = clip(alpha = lconv1ds_6_linear_end_output_min_to_fp16, beta = lconv1ds_6_linear_end_output_max_to_fp16, x = linear_75_cast_fp16)[name = string("clip_188_cast_fp16")]; + tensor hidden_states_695_cast_fp16 = add(x = clip_188_cast_fp16, y = hidden_states_673_cast_fp16)[name = string("hidden_states_695_cast_fp16")]; + string hidden_states_695_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_695_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3223 = const()[name = string("op_3223"), val = fp32(-0x1p-1)]; + fp32 var_3224 = const()[name = string("op_3224"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_3225 = const()[name = string("op_3225"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_695_cast_fp16_to_fp32 = cast(dtype = hidden_states_695_cast_fp16_to_fp32_dtype_0, x = hidden_states_695_cast_fp16)[name = string("cast_238")]; + tensor clip_189 = clip(alpha = var_3225, beta = var_3224, x = hidden_states_695_cast_fp16_to_fp32)[name = string("clip_189")]; + fp32 var_3219_promoted = const()[name = string("op_3219_promoted"), val = fp32(0x1p+1)]; + tensor var_3233 = pow(x = clip_189, y = var_3219_promoted)[name = string("op_3233")]; + tensor var_3235_axes_0 = const()[name = string("op_3235_axes_0"), val = tensor([-1])]; + bool var_3235_keep_dims_0 = const()[name = string("op_3235_keep_dims_0"), val = bool(true)]; + tensor var_3235 = reduce_mean(axes = var_3235_axes_0, keep_dims = var_3235_keep_dims_0, x = var_3233)[name = string("op_3235")]; + string var_3235_to_fp16_dtype_0 = const()[name = string("op_3235_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3236_to_fp16 = const()[name = string("op_3236_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3235_to_fp16 = cast(dtype = var_3235_to_fp16_dtype_0, x = var_3235)[name = string("cast_237")]; + tensor mean_squared_121_cast_fp16 = add(x = var_3235_to_fp16, y = var_3236_to_fp16)[name = string("mean_squared_121_cast_fp16")]; + string mean_squared_121_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_121_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_121_cast_fp16_to_fp32 = cast(dtype = mean_squared_121_cast_fp16_to_fp32_dtype_0, x = mean_squared_121_cast_fp16)[name = string("cast_236")]; + tensor var_3238 = pow(x = mean_squared_121_cast_fp16_to_fp32, y = var_3223)[name = string("op_3238")]; + string clip_189_to_fp16_dtype_0 = const()[name = string("clip_189_to_fp16_dtype_0"), val = string("fp16")]; + string var_3238_to_fp16_dtype_0 = const()[name = string("op_3238_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_189_to_fp16 = cast(dtype = clip_189_to_fp16_dtype_0, x = clip_189)[name = string("cast_234")]; + tensor var_3238_to_fp16 = cast(dtype = var_3238_to_fp16_dtype_0, x = var_3238)[name = string("cast_235")]; + tensor normed_output_241_cast_fp16 = mul(x = clip_189_to_fp16, y = var_3238_to_fp16)[name = string("normed_output_241_cast_fp16")]; + tensor const_104_to_fp16 = const()[name = string("const_104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81245632)))]; + tensor normed_output_243_cast_fp16 = mul(x = normed_output_241_cast_fp16, y = const_104_to_fp16)[name = string("normed_output_243_cast_fp16")]; + fp16 feed_forward2s_6_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_6_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.d2p+3)]; + fp16 feed_forward2s_6_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_6_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.cep+3)]; + tensor clip_190_cast_fp16 = clip(alpha = feed_forward2s_6_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_6_ffw_layer_1_input_max_to_fp16, x = normed_output_243_cast_fp16)[name = string("clip_190_cast_fp16")]; + tensor feed_forward2s_6_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81247744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83344960))))[name = string("feed_forward2s_6_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_6_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_190_cast_fp16)[name = string("linear_76_cast_fp16")]; + fp16 feed_forward2s_6_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_6_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.0ap+5)]; + fp16 feed_forward2s_6_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_6_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.08p+5)]; + tensor clip_191_cast_fp16 = clip(alpha = feed_forward2s_6_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_6_ffw_layer_1_output_max_to_fp16, x = linear_76_cast_fp16)[name = string("clip_191_cast_fp16")]; + tensor hidden_states_705_cast_fp16 = silu(x = clip_191_cast_fp16)[name = string("hidden_states_705_cast_fp16")]; + fp16 feed_forward2s_6_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_6_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.6ep+3)]; + fp16 feed_forward2s_6_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_6_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.6cp+3)]; + tensor clip_192_cast_fp16 = clip(alpha = feed_forward2s_6_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_6_ffw_layer_2_input_max_to_fp16, x = hidden_states_705_cast_fp16)[name = string("clip_192_cast_fp16")]; + tensor feed_forward2s_6_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83349120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85446336))))[name = string("feed_forward2s_6_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_6_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_192_cast_fp16)[name = string("linear_77_cast_fp16")]; + fp16 feed_forward2s_6_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_6_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.34p+6)]; + fp16 feed_forward2s_6_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_6_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.32p+6)]; + tensor clip_193_cast_fp16 = clip(alpha = feed_forward2s_6_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_6_ffw_layer_2_output_max_to_fp16, x = linear_77_cast_fp16)[name = string("clip_193_cast_fp16")]; + string clip_193_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_193_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_193_cast_fp16_to_fp32 = cast(dtype = clip_193_cast_fp16_to_fp32_dtype_0, x = clip_193_cast_fp16)[name = string("cast_233")]; + tensor clip_194 = clip(alpha = var_3225, beta = var_3224, x = clip_193_cast_fp16_to_fp32)[name = string("clip_194")]; + fp32 var_3219_promoted_1 = const()[name = string("op_3219_promoted_1"), val = fp32(0x1p+1)]; + tensor var_3265 = pow(x = clip_194, y = var_3219_promoted_1)[name = string("op_3265")]; + tensor var_3267_axes_0 = const()[name = string("op_3267_axes_0"), val = tensor([-1])]; + bool var_3267_keep_dims_0 = const()[name = string("op_3267_keep_dims_0"), val = bool(true)]; + tensor var_3267 = reduce_mean(axes = var_3267_axes_0, keep_dims = var_3267_keep_dims_0, x = var_3265)[name = string("op_3267")]; + string var_3267_to_fp16_dtype_0 = const()[name = string("op_3267_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3267_to_fp16 = cast(dtype = var_3267_to_fp16_dtype_0, x = var_3267)[name = string("cast_232")]; + tensor mean_squared_123_cast_fp16 = add(x = var_3267_to_fp16, y = var_3268_to_fp16)[name = string("mean_squared_123_cast_fp16")]; + string mean_squared_123_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_123_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_123_cast_fp16_to_fp32 = cast(dtype = mean_squared_123_cast_fp16_to_fp32_dtype_0, x = mean_squared_123_cast_fp16)[name = string("cast_231")]; + tensor var_3270 = pow(x = mean_squared_123_cast_fp16_to_fp32, y = var_3223)[name = string("op_3270")]; + string clip_194_to_fp16_dtype_0 = const()[name = string("clip_194_to_fp16_dtype_0"), val = string("fp16")]; + string var_3270_to_fp16_dtype_0 = const()[name = string("op_3270_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_194_to_fp16 = cast(dtype = clip_194_to_fp16_dtype_0, x = clip_194)[name = string("cast_229")]; + tensor var_3270_to_fp16 = cast(dtype = var_3270_to_fp16_dtype_0, x = var_3270)[name = string("cast_230")]; + tensor normed_output_245_cast_fp16 = mul(x = clip_194_to_fp16, y = var_3270_to_fp16)[name = string("normed_output_245_cast_fp16")]; + tensor const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85447424)))]; + tensor normed_output_247_cast_fp16 = mul(x = normed_output_245_cast_fp16, y = const_105_to_fp16)[name = string("normed_output_247_cast_fp16")]; + fp16 var_3215_to_fp16 = const()[name = string("op_3215_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_717_cast_fp16 = mul(x = normed_output_247_cast_fp16, y = var_3215_to_fp16)[name = string("hidden_states_717_cast_fp16")]; + tensor hidden_states_719_cast_fp16 = add(x = hidden_states_717_cast_fp16, y = hidden_states_695_cast_fp16)[name = string("hidden_states_719_cast_fp16")]; + fp16 var_3277_to_fp16 = const()[name = string("op_3277_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_3278_to_fp16 = const()[name = string("op_3278_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_195_cast_fp16 = clip(alpha = var_3277_to_fp16, beta = var_3278_to_fp16, x = hidden_states_719_cast_fp16)[name = string("clip_195_cast_fp16")]; + string clip_195_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_195_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3280 = const()[name = string("op_3280"), val = fp32(-0x1p-1)]; + fp32 var_3284_promoted = const()[name = string("op_3284_promoted"), val = fp32(0x1p+1)]; + tensor clip_195_cast_fp16_to_fp32 = cast(dtype = clip_195_cast_fp16_to_fp32_dtype_0, x = clip_195_cast_fp16)[name = string("cast_228")]; + tensor var_3290 = pow(x = clip_195_cast_fp16_to_fp32, y = var_3284_promoted)[name = string("op_3290")]; + tensor var_3292_axes_0 = const()[name = string("op_3292_axes_0"), val = tensor([-1])]; + bool var_3292_keep_dims_0 = const()[name = string("op_3292_keep_dims_0"), val = bool(true)]; + tensor var_3292 = reduce_mean(axes = var_3292_axes_0, keep_dims = var_3292_keep_dims_0, x = var_3290)[name = string("op_3292")]; + string var_3292_to_fp16_dtype_0 = const()[name = string("op_3292_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3293_to_fp16 = const()[name = string("op_3293_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3292_to_fp16 = cast(dtype = var_3292_to_fp16_dtype_0, x = var_3292)[name = string("cast_227")]; + tensor mean_squared_125_cast_fp16 = add(x = var_3292_to_fp16, y = var_3293_to_fp16)[name = string("mean_squared_125_cast_fp16")]; + string mean_squared_125_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_125_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_125_cast_fp16_to_fp32 = cast(dtype = mean_squared_125_cast_fp16_to_fp32_dtype_0, x = mean_squared_125_cast_fp16)[name = string("cast_226")]; + tensor var_3295 = pow(x = mean_squared_125_cast_fp16_to_fp32, y = var_3280)[name = string("op_3295")]; + string var_3295_to_fp16_dtype_0 = const()[name = string("op_3295_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_3295_to_fp16 = cast(dtype = var_3295_to_fp16_dtype_0, x = var_3295)[name = string("cast_225")]; + tensor normed_output_249_cast_fp16 = mul(x = clip_195_cast_fp16, y = var_3295_to_fp16)[name = string("normed_output_249_cast_fp16")]; + tensor const_106_to_fp16 = const()[name = string("const_106_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85449536)))]; + tensor normed_output_251_cast_fp16 = mul(x = normed_output_249_cast_fp16, y = const_106_to_fp16)[name = string("normed_output_251_cast_fp16")]; + string normed_output_251_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_251_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3308 = const()[name = string("op_3308"), val = fp32(-0x1p-1)]; + fp32 var_3309 = const()[name = string("op_3309"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_3310 = const()[name = string("op_3310"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_251_cast_fp16_to_fp32 = cast(dtype = normed_output_251_cast_fp16_to_fp32_dtype_0, x = normed_output_251_cast_fp16)[name = string("cast_224")]; + tensor clip_196 = clip(alpha = var_3310, beta = var_3309, x = normed_output_251_cast_fp16_to_fp32)[name = string("clip_196")]; + fp32 var_3304_promoted = const()[name = string("op_3304_promoted"), val = fp32(0x1p+1)]; + tensor var_3318 = pow(x = clip_196, y = var_3304_promoted)[name = string("op_3318")]; + tensor var_3320_axes_0 = const()[name = string("op_3320_axes_0"), val = tensor([-1])]; + bool var_3320_keep_dims_0 = const()[name = string("op_3320_keep_dims_0"), val = bool(true)]; + tensor var_3320 = reduce_mean(axes = var_3320_axes_0, keep_dims = var_3320_keep_dims_0, x = var_3318)[name = string("op_3320")]; + string var_3320_to_fp16_dtype_0 = const()[name = string("op_3320_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3321_to_fp16 = const()[name = string("op_3321_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3320_to_fp16 = cast(dtype = var_3320_to_fp16_dtype_0, x = var_3320)[name = string("cast_223")]; + tensor mean_squared_127_cast_fp16 = add(x = var_3320_to_fp16, y = var_3321_to_fp16)[name = string("mean_squared_127_cast_fp16")]; + string mean_squared_127_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_127_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_127_cast_fp16_to_fp32 = cast(dtype = mean_squared_127_cast_fp16_to_fp32_dtype_0, x = mean_squared_127_cast_fp16)[name = string("cast_222")]; + tensor var_3323 = pow(x = mean_squared_127_cast_fp16_to_fp32, y = var_3308)[name = string("op_3323")]; + string clip_196_to_fp16_dtype_0 = const()[name = string("clip_196_to_fp16_dtype_0"), val = string("fp16")]; + string var_3323_to_fp16_dtype_0 = const()[name = string("op_3323_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_196_to_fp16 = cast(dtype = clip_196_to_fp16_dtype_0, x = clip_196)[name = string("cast_220")]; + tensor var_3323_to_fp16 = cast(dtype = var_3323_to_fp16_dtype_0, x = var_3323)[name = string("cast_221")]; + tensor normed_output_253_cast_fp16 = mul(x = clip_196_to_fp16, y = var_3323_to_fp16)[name = string("normed_output_253_cast_fp16")]; + tensor const_107_to_fp16 = const()[name = string("const_107_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85451648)))]; + tensor normed_output_255_cast_fp16 = mul(x = normed_output_253_cast_fp16, y = const_107_to_fp16)[name = string("normed_output_255_cast_fp16")]; + fp16 feed_forward1s_7_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_7_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.8cp+3)]; + fp16 feed_forward1s_7_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_7_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.88p+3)]; + tensor clip_197_cast_fp16 = clip(alpha = feed_forward1s_7_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_7_ffw_layer_1_input_max_to_fp16, x = normed_output_255_cast_fp16)[name = string("clip_197_cast_fp16")]; + tensor feed_forward1s_7_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85453760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87550976))))[name = string("feed_forward1s_7_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_7_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_197_cast_fp16)[name = string("linear_78_cast_fp16")]; + fp16 feed_forward1s_7_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_7_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.98p+4)]; + fp16 feed_forward1s_7_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_7_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.94p+4)]; + tensor clip_198_cast_fp16 = clip(alpha = feed_forward1s_7_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_7_ffw_layer_1_output_max_to_fp16, x = linear_78_cast_fp16)[name = string("clip_198_cast_fp16")]; + tensor hidden_states_735_cast_fp16 = silu(x = clip_198_cast_fp16)[name = string("hidden_states_735_cast_fp16")]; + fp16 feed_forward1s_7_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_7_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.12p+3)]; + fp16 feed_forward1s_7_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_7_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.1p+3)]; + tensor clip_199_cast_fp16 = clip(alpha = feed_forward1s_7_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_7_ffw_layer_2_input_max_to_fp16, x = hidden_states_735_cast_fp16)[name = string("clip_199_cast_fp16")]; + tensor feed_forward1s_7_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87555136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89652352))))[name = string("feed_forward1s_7_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_79_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_7_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_199_cast_fp16)[name = string("linear_79_cast_fp16")]; + fp16 feed_forward1s_7_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_7_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.52p+5)]; + fp16 feed_forward1s_7_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_7_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.5p+5)]; + tensor clip_200_cast_fp16 = clip(alpha = feed_forward1s_7_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_7_ffw_layer_2_output_max_to_fp16, x = linear_79_cast_fp16)[name = string("clip_200_cast_fp16")]; + string clip_200_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_200_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_200_cast_fp16_to_fp32 = cast(dtype = clip_200_cast_fp16_to_fp32_dtype_0, x = clip_200_cast_fp16)[name = string("cast_219")]; + tensor clip_201 = clip(alpha = var_3310, beta = var_3309, x = clip_200_cast_fp16_to_fp32)[name = string("clip_201")]; + fp32 var_3304_promoted_1 = const()[name = string("op_3304_promoted_1"), val = fp32(0x1p+1)]; + tensor var_3350 = pow(x = clip_201, y = var_3304_promoted_1)[name = string("op_3350")]; + tensor var_3352_axes_0 = const()[name = string("op_3352_axes_0"), val = tensor([-1])]; + bool var_3352_keep_dims_0 = const()[name = string("op_3352_keep_dims_0"), val = bool(true)]; + tensor var_3352 = reduce_mean(axes = var_3352_axes_0, keep_dims = var_3352_keep_dims_0, x = var_3350)[name = string("op_3352")]; + string var_3352_to_fp16_dtype_0 = const()[name = string("op_3352_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3353_to_fp16 = const()[name = string("op_3353_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3352_to_fp16 = cast(dtype = var_3352_to_fp16_dtype_0, x = var_3352)[name = string("cast_218")]; + tensor mean_squared_129_cast_fp16 = add(x = var_3352_to_fp16, y = var_3353_to_fp16)[name = string("mean_squared_129_cast_fp16")]; + string mean_squared_129_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_129_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_129_cast_fp16_to_fp32 = cast(dtype = mean_squared_129_cast_fp16_to_fp32_dtype_0, x = mean_squared_129_cast_fp16)[name = string("cast_217")]; + tensor var_3355 = pow(x = mean_squared_129_cast_fp16_to_fp32, y = var_3308)[name = string("op_3355")]; + string clip_201_to_fp16_dtype_0 = const()[name = string("clip_201_to_fp16_dtype_0"), val = string("fp16")]; + string var_3355_to_fp16_dtype_0 = const()[name = string("op_3355_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_201_to_fp16 = cast(dtype = clip_201_to_fp16_dtype_0, x = clip_201)[name = string("cast_215")]; + tensor var_3355_to_fp16 = cast(dtype = var_3355_to_fp16_dtype_0, x = var_3355)[name = string("cast_216")]; + tensor normed_output_257_cast_fp16 = mul(x = clip_201_to_fp16, y = var_3355_to_fp16)[name = string("normed_output_257_cast_fp16")]; + tensor const_108_to_fp16 = const()[name = string("const_108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89653440)))]; + tensor normed_output_259_cast_fp16 = mul(x = normed_output_257_cast_fp16, y = const_108_to_fp16)[name = string("normed_output_259_cast_fp16")]; + fp16 var_3300_to_fp16 = const()[name = string("op_3300_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_747_cast_fp16 = mul(x = normed_output_259_cast_fp16, y = var_3300_to_fp16)[name = string("hidden_states_747_cast_fp16")]; + tensor hidden_states_749_cast_fp16 = add(x = hidden_states_747_cast_fp16, y = normed_output_251_cast_fp16)[name = string("hidden_states_749_cast_fp16")]; + fp16 var_3362_to_fp16 = const()[name = string("op_3362_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_3363_to_fp16 = const()[name = string("op_3363_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_202_cast_fp16 = clip(alpha = var_3362_to_fp16, beta = var_3363_to_fp16, x = hidden_states_749_cast_fp16)[name = string("clip_202_cast_fp16")]; + string clip_202_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_202_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3365 = const()[name = string("op_3365"), val = fp32(-0x1p-1)]; + fp32 var_3369_promoted = const()[name = string("op_3369_promoted"), val = fp32(0x1p+1)]; + tensor clip_202_cast_fp16_to_fp32 = cast(dtype = clip_202_cast_fp16_to_fp32_dtype_0, x = clip_202_cast_fp16)[name = string("cast_214")]; + tensor var_3375 = pow(x = clip_202_cast_fp16_to_fp32, y = var_3369_promoted)[name = string("op_3375")]; + tensor var_3377_axes_0 = const()[name = string("op_3377_axes_0"), val = tensor([-1])]; + bool var_3377_keep_dims_0 = const()[name = string("op_3377_keep_dims_0"), val = bool(true)]; + tensor var_3377 = reduce_mean(axes = var_3377_axes_0, keep_dims = var_3377_keep_dims_0, x = var_3375)[name = string("op_3377")]; + string var_3377_to_fp16_dtype_0 = const()[name = string("op_3377_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3378_to_fp16 = const()[name = string("op_3378_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3377_to_fp16 = cast(dtype = var_3377_to_fp16_dtype_0, x = var_3377)[name = string("cast_213")]; + tensor mean_squared_131_cast_fp16 = add(x = var_3377_to_fp16, y = var_3378_to_fp16)[name = string("mean_squared_131_cast_fp16")]; + string mean_squared_131_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_131_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_131_cast_fp16_to_fp32 = cast(dtype = mean_squared_131_cast_fp16_to_fp32_dtype_0, x = mean_squared_131_cast_fp16)[name = string("cast_212")]; + tensor var_3380 = pow(x = mean_squared_131_cast_fp16_to_fp32, y = var_3365)[name = string("op_3380")]; + string var_3380_to_fp16_dtype_0 = const()[name = string("op_3380_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_3380_to_fp16 = cast(dtype = var_3380_to_fp16_dtype_0, x = var_3380)[name = string("cast_211")]; + tensor normed_output_261_cast_fp16 = mul(x = clip_202_cast_fp16, y = var_3380_to_fp16)[name = string("normed_output_261_cast_fp16")]; + tensor const_109_to_fp16 = const()[name = string("const_109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89655552)))]; + tensor normed_output_263_cast_fp16 = mul(x = normed_output_261_cast_fp16, y = const_109_to_fp16)[name = string("normed_output_263_cast_fp16")]; + int32 var_3386 = const()[name = string("op_3386"), val = int32(-1)]; + fp32 var_3387 = const()[name = string("op_3387"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_7_q_proj_input_min_to_fp16 = const()[name = string("self_attns_7_q_proj_input_min_to_fp16"), val = fp16(-0x1.5ep+3)]; + fp16 self_attns_7_q_proj_input_max_to_fp16 = const()[name = string("self_attns_7_q_proj_input_max_to_fp16"), val = fp16(0x1.5cp+3)]; + tensor clip_203_cast_fp16 = clip(alpha = self_attns_7_q_proj_input_min_to_fp16, beta = self_attns_7_q_proj_input_max_to_fp16, x = normed_output_263_cast_fp16)[name = string("clip_203_cast_fp16")]; + tensor self_attns_7_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89657664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90182016))))[name = string("self_attns_7_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_80_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_7_q_proj_linear_weight_to_fp16_palettized, x = clip_203_cast_fp16)[name = string("linear_80_cast_fp16")]; + fp16 self_attns_7_q_proj_output_min_to_fp16 = const()[name = string("self_attns_7_q_proj_output_min_to_fp16"), val = fp16(-0x1.18p+4)]; + fp16 self_attns_7_q_proj_output_max_to_fp16 = const()[name = string("self_attns_7_q_proj_output_max_to_fp16"), val = fp16(0x1.16p+4)]; + tensor clip_204_cast_fp16 = clip(alpha = self_attns_7_q_proj_output_min_to_fp16, beta = self_attns_7_q_proj_output_max_to_fp16, x = linear_80_cast_fp16)[name = string("clip_204_cast_fp16")]; + tensor var_3431 = const()[name = string("op_3431"), val = tensor([1, 50, 8, 128])]; + tensor q_15_cast_fp16 = reshape(shape = var_3431, x = clip_204_cast_fp16)[name = string("q_15_cast_fp16")]; + tensor self_attns_7_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90183104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90707456))))[name = string("self_attns_7_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_81_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_7_k_proj_linear_weight_to_fp16_palettized, x = clip_203_cast_fp16)[name = string("linear_81_cast_fp16")]; + fp16 self_attns_7_k_proj_output_min_to_fp16 = const()[name = string("self_attns_7_k_proj_output_min_to_fp16"), val = fp16(-0x1.18p+4)]; + fp16 self_attns_7_k_proj_output_max_to_fp16 = const()[name = string("self_attns_7_k_proj_output_max_to_fp16"), val = fp16(0x1.16p+4)]; + tensor clip_206_cast_fp16 = clip(alpha = self_attns_7_k_proj_output_min_to_fp16, beta = self_attns_7_k_proj_output_max_to_fp16, x = linear_81_cast_fp16)[name = string("clip_206_cast_fp16")]; + tensor var_3443 = const()[name = string("op_3443"), val = tensor([1, 50, 8, 128])]; + tensor k_15_cast_fp16 = reshape(shape = var_3443, x = clip_206_cast_fp16)[name = string("k_15_cast_fp16")]; + tensor self_attns_7_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90708544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91232896))))[name = string("self_attns_7_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_82_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_7_v_proj_linear_weight_to_fp16_palettized, x = clip_203_cast_fp16)[name = string("linear_82_cast_fp16")]; + fp16 self_attns_7_v_proj_output_min_to_fp16 = const()[name = string("self_attns_7_v_proj_output_min_to_fp16"), val = fp16(-0x1.18p+4)]; + fp16 self_attns_7_v_proj_output_max_to_fp16 = const()[name = string("self_attns_7_v_proj_output_max_to_fp16"), val = fp16(0x1.16p+4)]; + tensor clip_208_cast_fp16 = clip(alpha = self_attns_7_v_proj_output_min_to_fp16, beta = self_attns_7_v_proj_output_max_to_fp16, x = linear_82_cast_fp16)[name = string("clip_208_cast_fp16")]; + tensor var_3455 = const()[name = string("op_3455"), val = tensor([1, 50, 8, 128])]; + tensor input_327_cast_fp16 = reshape(shape = var_3455, x = clip_208_cast_fp16)[name = string("input_327_cast_fp16")]; + fp16 var_3457_to_fp16 = const()[name = string("op_3457_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_3458_cast_fp16 = mul(x = q_15_cast_fp16, y = var_3457_to_fp16)[name = string("op_3458_cast_fp16")]; + tensor var_3459_to_fp16 = const()[name = string("op_3459_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91233984)))]; + tensor input_323_cast_fp16 = mul(x = var_3458_cast_fp16, y = var_3459_to_fp16)[name = string("input_323_cast_fp16")]; + fp16 var_3461_to_fp16 = const()[name = string("op_3461_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_325_cast_fp16 = mul(x = k_15_cast_fp16, y = var_3461_to_fp16)[name = string("input_325_cast_fp16")]; + tensor q_padded_15_pad_0 = const()[name = string("q_padded_15_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_15_mode_0 = const()[name = string("q_padded_15_mode_0"), val = string("constant")]; + fp16 const_110_to_fp16 = const()[name = string("const_110_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_15_cast_fp16 = pad(constant_val = const_110_to_fp16, mode = q_padded_15_mode_0, pad = q_padded_15_pad_0, x = input_323_cast_fp16)[name = string("q_padded_15_cast_fp16")]; + tensor var_3465 = const()[name = string("op_3465"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_15_cast_fp16 = reshape(shape = var_3465, x = q_padded_15_cast_fp16)[name = string("q_blocks_15_cast_fp16")]; + tensor k_padded_15_pad_0 = const()[name = string("k_padded_15_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_15_mode_0 = const()[name = string("k_padded_15_mode_0"), val = string("constant")]; + fp16 const_111_to_fp16 = const()[name = string("const_111_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_15_cast_fp16 = pad(constant_val = const_111_to_fp16, mode = k_padded_15_mode_0, pad = k_padded_15_pad_0, x = input_325_cast_fp16)[name = string("k_padded_15_cast_fp16")]; + tensor v_padded_15_pad_0 = const()[name = string("v_padded_15_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_15_mode_0 = const()[name = string("v_padded_15_mode_0"), val = string("constant")]; + fp16 const_112_to_fp16 = const()[name = string("const_112_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_15_cast_fp16 = pad(constant_val = const_112_to_fp16, mode = v_padded_15_mode_0, pad = v_padded_15_pad_0, x = input_327_cast_fp16)[name = string("v_padded_15_cast_fp16")]; + tensor var_3472_begin_0 = const()[name = string("op_3472_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3472_end_0 = const()[name = string("op_3472_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_3472_end_mask_0 = const()[name = string("op_3472_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3472_cast_fp16 = slice_by_index(begin = var_3472_begin_0, end = var_3472_end_0, end_mask = var_3472_end_mask_0, x = k_padded_15_cast_fp16)[name = string("op_3472_cast_fp16")]; + tensor var_3474_begin_0 = const()[name = string("op_3474_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_3474_end_0 = const()[name = string("op_3474_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_3474_end_mask_0 = const()[name = string("op_3474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3474_cast_fp16 = slice_by_index(begin = var_3474_begin_0, end = var_3474_end_0, end_mask = var_3474_end_mask_0, x = k_padded_15_cast_fp16)[name = string("op_3474_cast_fp16")]; + tensor var_3476_begin_0 = const()[name = string("op_3476_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_3476_end_0 = const()[name = string("op_3476_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_3476_end_mask_0 = const()[name = string("op_3476_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3476_cast_fp16 = slice_by_index(begin = var_3476_begin_0, end = var_3476_end_0, end_mask = var_3476_end_mask_0, x = k_padded_15_cast_fp16)[name = string("op_3476_cast_fp16")]; + tensor var_3478_begin_0 = const()[name = string("op_3478_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_3478_end_0 = const()[name = string("op_3478_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_3478_end_mask_0 = const()[name = string("op_3478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3478_cast_fp16 = slice_by_index(begin = var_3478_begin_0, end = var_3478_end_0, end_mask = var_3478_end_mask_0, x = k_padded_15_cast_fp16)[name = string("op_3478_cast_fp16")]; + tensor var_3480_begin_0 = const()[name = string("op_3480_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_3480_end_0 = const()[name = string("op_3480_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_3480_end_mask_0 = const()[name = string("op_3480_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3480_cast_fp16 = slice_by_index(begin = var_3480_begin_0, end = var_3480_end_0, end_mask = var_3480_end_mask_0, x = k_padded_15_cast_fp16)[name = string("op_3480_cast_fp16")]; + int32 k_blocks_15_axis_0 = const()[name = string("k_blocks_15_axis_0"), val = int32(1)]; + tensor k_blocks_15_cast_fp16 = stack(axis = k_blocks_15_axis_0, values = (var_3472_cast_fp16, var_3474_cast_fp16, var_3476_cast_fp16, var_3478_cast_fp16, var_3480_cast_fp16))[name = string("k_blocks_15_cast_fp16")]; + tensor var_3484_begin_0 = const()[name = string("op_3484_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3484_end_0 = const()[name = string("op_3484_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_3484_end_mask_0 = const()[name = string("op_3484_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3484_cast_fp16 = slice_by_index(begin = var_3484_begin_0, end = var_3484_end_0, end_mask = var_3484_end_mask_0, x = v_padded_15_cast_fp16)[name = string("op_3484_cast_fp16")]; + tensor var_3486_begin_0 = const()[name = string("op_3486_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_3486_end_0 = const()[name = string("op_3486_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_3486_end_mask_0 = const()[name = string("op_3486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3486_cast_fp16 = slice_by_index(begin = var_3486_begin_0, end = var_3486_end_0, end_mask = var_3486_end_mask_0, x = v_padded_15_cast_fp16)[name = string("op_3486_cast_fp16")]; + tensor var_3488_begin_0 = const()[name = string("op_3488_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_3488_end_0 = const()[name = string("op_3488_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_3488_end_mask_0 = const()[name = string("op_3488_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3488_cast_fp16 = slice_by_index(begin = var_3488_begin_0, end = var_3488_end_0, end_mask = var_3488_end_mask_0, x = v_padded_15_cast_fp16)[name = string("op_3488_cast_fp16")]; + tensor var_3490_begin_0 = const()[name = string("op_3490_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_3490_end_0 = const()[name = string("op_3490_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_3490_end_mask_0 = const()[name = string("op_3490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3490_cast_fp16 = slice_by_index(begin = var_3490_begin_0, end = var_3490_end_0, end_mask = var_3490_end_mask_0, x = v_padded_15_cast_fp16)[name = string("op_3490_cast_fp16")]; + tensor var_3492_begin_0 = const()[name = string("op_3492_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_3492_end_0 = const()[name = string("op_3492_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_3492_end_mask_0 = const()[name = string("op_3492_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3492_cast_fp16 = slice_by_index(begin = var_3492_begin_0, end = var_3492_end_0, end_mask = var_3492_end_mask_0, x = v_padded_15_cast_fp16)[name = string("op_3492_cast_fp16")]; + int32 v_blocks_15_axis_0 = const()[name = string("v_blocks_15_axis_0"), val = int32(1)]; + tensor v_blocks_15_cast_fp16 = stack(axis = v_blocks_15_axis_0, values = (var_3484_cast_fp16, var_3486_cast_fp16, var_3488_cast_fp16, var_3490_cast_fp16, var_3492_cast_fp16))[name = string("v_blocks_15_cast_fp16")]; + tensor var_3500 = const()[name = string("op_3500"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_3502 = const()[name = string("op_3502"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_15_transpose_x_0 = const()[name = string("matrix_ac_15_transpose_x_0"), val = bool(false)]; + bool matrix_ac_15_transpose_y_0 = const()[name = string("matrix_ac_15_transpose_y_0"), val = bool(false)]; + tensor queries_15_cast_fp16 = transpose(perm = var_3500, x = q_blocks_15_cast_fp16)[name = string("transpose_28")]; + tensor keys_t_15_cast_fp16 = transpose(perm = var_3502, x = k_blocks_15_cast_fp16)[name = string("transpose_29")]; + tensor matrix_ac_15_cast_fp16 = matmul(transpose_x = matrix_ac_15_transpose_x_0, transpose_y = matrix_ac_15_transpose_y_0, x = queries_15_cast_fp16, y = keys_t_15_cast_fp16)[name = string("matrix_ac_15_cast_fp16")]; + tensor var_3505 = const()[name = string("op_3505"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_15_cast_fp16 = reshape(shape = var_3505, x = queries_15_cast_fp16)[name = string("q_flat_15_cast_fp16")]; + bool matrix_bd_71_transpose_x_0 = const()[name = string("matrix_bd_71_transpose_x_0"), val = bool(false)]; + bool matrix_bd_71_transpose_y_0 = const()[name = string("matrix_bd_71_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_15_to_fp16 = const()[name = string("rel_k_t_15_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91234304)))]; + tensor matrix_bd_71_cast_fp16 = matmul(transpose_x = matrix_bd_71_transpose_x_0, transpose_y = matrix_bd_71_transpose_y_0, x = q_flat_15_cast_fp16, y = rel_k_t_15_to_fp16)[name = string("matrix_bd_71_cast_fp16")]; + tensor var_3510 = const()[name = string("op_3510"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_329_cast_fp16 = reshape(shape = var_3510, x = matrix_bd_71_cast_fp16)[name = string("input_329_cast_fp16")]; + tensor matrix_bd_73_pad_0 = const()[name = string("matrix_bd_73_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91260992)))]; + string matrix_bd_73_mode_0 = const()[name = string("matrix_bd_73_mode_0"), val = string("constant")]; + fp16 const_114_to_fp16 = const()[name = string("const_114_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_73_cast_fp16 = pad(constant_val = const_114_to_fp16, mode = matrix_bd_73_mode_0, pad = matrix_bd_73_pad_0, x = input_329_cast_fp16)[name = string("matrix_bd_73_cast_fp16")]; + tensor var_3514 = const()[name = string("op_3514"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_75_cast_fp16 = reshape(shape = var_3514, x = matrix_bd_73_cast_fp16)[name = string("matrix_bd_75_cast_fp16")]; + tensor matrix_bd_77_begin_0 = const()[name = string("matrix_bd_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_77_end_0 = const()[name = string("matrix_bd_77_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_77_end_mask_0 = const()[name = string("matrix_bd_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_77_cast_fp16 = slice_by_index(begin = matrix_bd_77_begin_0, end = matrix_bd_77_end_0, end_mask = matrix_bd_77_end_mask_0, x = matrix_bd_75_cast_fp16)[name = string("matrix_bd_77_cast_fp16")]; + tensor var_3520 = const()[name = string("op_3520"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_79_cast_fp16 = reshape(shape = var_3520, x = matrix_bd_77_cast_fp16)[name = string("matrix_bd_79_cast_fp16")]; + tensor attn_43_cast_fp16 = add(x = matrix_ac_15_cast_fp16, y = matrix_bd_79_cast_fp16)[name = string("attn_43_cast_fp16")]; + fp16 _inversed_3523_y_0_to_fp16 = const()[name = string("_inversed_3523_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_3523_cast_fp16 = mul(x = attn_43_cast_fp16, y = _inversed_3523_y_0_to_fp16)[name = string("_inversed_3523_cast_fp16")]; + string _inversed_3523_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_3523_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_3523_cast_fp16_to_fp32 = cast(dtype = _inversed_3523_cast_fp16_to_fp32_dtype_0, x = _inversed_3523_cast_fp16)[name = string("cast_210")]; + tensor var_3524 = tanh(x = _inversed_3523_cast_fp16_to_fp32)[name = string("op_3524")]; + string var_3524_to_fp16_dtype_0 = const()[name = string("op_3524_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_7_softcap_to_fp16 = const()[name = string("self_attns_7_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_3524_to_fp16 = cast(dtype = var_3524_to_fp16_dtype_0, x = var_3524)[name = string("cast_209")]; + tensor attn_45_cast_fp16 = mul(x = var_3524_to_fp16, y = self_attns_7_softcap_to_fp16)[name = string("attn_45_cast_fp16")]; + string attn_45_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_45_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_45_cast_fp16_to_fp32 = cast(dtype = attn_45_cast_fp16_to_fp32_dtype_0, x = attn_45_cast_fp16)[name = string("cast_208")]; + tensor input_331 = select(a = var_3387, b = attn_45_cast_fp16_to_fp32, cond = var_460)[name = string("input_331")]; + tensor var_3528 = softmax(axis = var_3386, x = input_331)[name = string("op_3528")]; + tensor var_3530 = const()[name = string("op_3530"), val = tensor([0, 3, 1, -3, -1])]; + bool out_43_transpose_x_0 = const()[name = string("out_43_transpose_x_0"), val = bool(false)]; + bool out_43_transpose_y_0 = const()[name = string("out_43_transpose_y_0"), val = bool(false)]; + string var_3528_to_fp16_dtype_0 = const()[name = string("op_3528_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_15_cast_fp16 = transpose(perm = var_3530, x = v_blocks_15_cast_fp16)[name = string("transpose_27")]; + tensor var_3528_to_fp16 = cast(dtype = var_3528_to_fp16_dtype_0, x = var_3528)[name = string("cast_207")]; + tensor out_43_cast_fp16 = matmul(transpose_x = out_43_transpose_x_0, transpose_y = out_43_transpose_y_0, x = var_3528_to_fp16, y = values_t_15_cast_fp16)[name = string("out_43_cast_fp16")]; + tensor var_3533 = const()[name = string("op_3533"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_3535 = const()[name = string("op_3535"), val = tensor([1, 60, 1024])]; + tensor var_3534_cast_fp16 = transpose(perm = var_3533, x = out_43_cast_fp16)[name = string("transpose_26")]; + tensor out_45_cast_fp16 = reshape(shape = var_3535, x = var_3534_cast_fp16)[name = string("out_45_cast_fp16")]; + tensor var_3538_begin_0 = const()[name = string("op_3538_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3538_end_0 = const()[name = string("op_3538_end_0"), val = tensor([1, 50, 1024])]; + tensor var_3538_end_mask_0 = const()[name = string("op_3538_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3538_cast_fp16 = slice_by_index(begin = var_3538_begin_0, end = var_3538_end_0, end_mask = var_3538_end_mask_0, x = out_45_cast_fp16)[name = string("op_3538_cast_fp16")]; + fp16 self_attns_7_post_input_min_to_fp16 = const()[name = string("self_attns_7_post_input_min_to_fp16"), val = fp16(-0x1.12p+4)]; + fp16 self_attns_7_post_input_max_to_fp16 = const()[name = string("self_attns_7_post_input_max_to_fp16"), val = fp16(0x1.1p+4)]; + tensor clip_209_cast_fp16 = clip(alpha = self_attns_7_post_input_min_to_fp16, beta = self_attns_7_post_input_max_to_fp16, x = var_3538_cast_fp16)[name = string("clip_209_cast_fp16")]; + tensor self_attns_7_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91261120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91785472))))[name = string("self_attns_7_post_linear_weight_to_fp16_palettized")]; + tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_7_post_linear_weight_to_fp16_palettized, x = clip_209_cast_fp16)[name = string("linear_84_cast_fp16")]; + fp16 self_attns_7_post_output_min_to_fp16 = const()[name = string("self_attns_7_post_output_min_to_fp16"), val = fp16(-0x1.c8p+5)]; + fp16 self_attns_7_post_output_max_to_fp16 = const()[name = string("self_attns_7_post_output_max_to_fp16"), val = fp16(0x1.c4p+5)]; + tensor clip_210_cast_fp16 = clip(alpha = self_attns_7_post_output_min_to_fp16, beta = self_attns_7_post_output_max_to_fp16, x = linear_84_cast_fp16)[name = string("clip_210_cast_fp16")]; + fp16 var_3550_to_fp16 = const()[name = string("op_3550_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_3551_to_fp16 = const()[name = string("op_3551_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_211_cast_fp16 = clip(alpha = var_3550_to_fp16, beta = var_3551_to_fp16, x = clip_210_cast_fp16)[name = string("clip_211_cast_fp16")]; + string clip_211_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_211_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3553 = const()[name = string("op_3553"), val = fp32(-0x1p-1)]; + fp32 var_3557_promoted = const()[name = string("op_3557_promoted"), val = fp32(0x1p+1)]; + tensor clip_211_cast_fp16_to_fp32 = cast(dtype = clip_211_cast_fp16_to_fp32_dtype_0, x = clip_211_cast_fp16)[name = string("cast_206")]; + tensor var_3563 = pow(x = clip_211_cast_fp16_to_fp32, y = var_3557_promoted)[name = string("op_3563")]; + tensor var_3565_axes_0 = const()[name = string("op_3565_axes_0"), val = tensor([-1])]; + bool var_3565_keep_dims_0 = const()[name = string("op_3565_keep_dims_0"), val = bool(true)]; + tensor var_3565 = reduce_mean(axes = var_3565_axes_0, keep_dims = var_3565_keep_dims_0, x = var_3563)[name = string("op_3565")]; + string var_3565_to_fp16_dtype_0 = const()[name = string("op_3565_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3566_to_fp16 = const()[name = string("op_3566_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3565_to_fp16 = cast(dtype = var_3565_to_fp16_dtype_0, x = var_3565)[name = string("cast_205")]; + tensor mean_squared_133_cast_fp16 = add(x = var_3565_to_fp16, y = var_3566_to_fp16)[name = string("mean_squared_133_cast_fp16")]; + string mean_squared_133_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_133_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_133_cast_fp16_to_fp32 = cast(dtype = mean_squared_133_cast_fp16_to_fp32_dtype_0, x = mean_squared_133_cast_fp16)[name = string("cast_204")]; + tensor var_3568 = pow(x = mean_squared_133_cast_fp16_to_fp32, y = var_3553)[name = string("op_3568")]; + string var_3568_to_fp16_dtype_0 = const()[name = string("op_3568_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_3568_to_fp16 = cast(dtype = var_3568_to_fp16_dtype_0, x = var_3568)[name = string("cast_203")]; + tensor normed_output_265_cast_fp16 = mul(x = clip_211_cast_fp16, y = var_3568_to_fp16)[name = string("normed_output_265_cast_fp16")]; + tensor const_115_to_fp16 = const()[name = string("const_115_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91786560)))]; + tensor normed_output_267_cast_fp16 = mul(x = normed_output_265_cast_fp16, y = const_115_to_fp16)[name = string("normed_output_267_cast_fp16")]; + tensor hidden_states_775_cast_fp16 = add(x = normed_output_267_cast_fp16, y = hidden_states_749_cast_fp16)[name = string("hidden_states_775_cast_fp16")]; + string hidden_states_775_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_775_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3575 = const()[name = string("op_3575"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_3576 = const()[name = string("op_3576"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_3588 = const()[name = string("op_3588"), val = fp32(-0x1p-1)]; + fp32 var_3584_promoted = const()[name = string("op_3584_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_775_cast_fp16_to_fp32 = cast(dtype = hidden_states_775_cast_fp16_to_fp32_dtype_0, x = hidden_states_775_cast_fp16)[name = string("cast_202")]; + tensor var_3596 = pow(x = hidden_states_775_cast_fp16_to_fp32, y = var_3584_promoted)[name = string("op_3596")]; + tensor var_3598_axes_0 = const()[name = string("op_3598_axes_0"), val = tensor([-1])]; + bool var_3598_keep_dims_0 = const()[name = string("op_3598_keep_dims_0"), val = bool(true)]; + tensor var_3598 = reduce_mean(axes = var_3598_axes_0, keep_dims = var_3598_keep_dims_0, x = var_3596)[name = string("op_3598")]; + string var_3598_to_fp16_dtype_0 = const()[name = string("op_3598_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3599_to_fp16 = const()[name = string("op_3599_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3598_to_fp16 = cast(dtype = var_3598_to_fp16_dtype_0, x = var_3598)[name = string("cast_201")]; + tensor mean_squared_135_cast_fp16 = add(x = var_3598_to_fp16, y = var_3599_to_fp16)[name = string("mean_squared_135_cast_fp16")]; + string mean_squared_135_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_135_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_135_cast_fp16_to_fp32 = cast(dtype = mean_squared_135_cast_fp16_to_fp32_dtype_0, x = mean_squared_135_cast_fp16)[name = string("cast_200")]; + tensor var_3601 = pow(x = mean_squared_135_cast_fp16_to_fp32, y = var_3588)[name = string("op_3601")]; + string var_3601_to_fp16_dtype_0 = const()[name = string("op_3601_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_3601_to_fp16 = cast(dtype = var_3601_to_fp16_dtype_0, x = var_3601)[name = string("cast_199")]; + tensor normed_output_269_cast_fp16 = mul(x = hidden_states_775_cast_fp16, y = var_3601_to_fp16)[name = string("normed_output_269_cast_fp16")]; + tensor const_116_to_fp16 = const()[name = string("const_116_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91788672)))]; + tensor normed_output_271_cast_fp16 = mul(x = normed_output_269_cast_fp16, y = const_116_to_fp16)[name = string("normed_output_271_cast_fp16")]; + fp16 lconv1ds_7_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_7_linear_start_input_min_to_fp16"), val = fp16(-0x1.58p+3)]; + fp16 lconv1ds_7_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_7_linear_start_input_max_to_fp16"), val = fp16(0x1.54p+3)]; + tensor clip_212_cast_fp16 = clip(alpha = lconv1ds_7_linear_start_input_min_to_fp16, beta = lconv1ds_7_linear_start_input_max_to_fp16, x = normed_output_271_cast_fp16)[name = string("clip_212_cast_fp16")]; + tensor lconv1ds_7_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91790784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92839424))))[name = string("lconv1ds_7_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_85_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_7_linear_start_linear_weight_to_fp16_palettized, x = clip_212_cast_fp16)[name = string("linear_85_cast_fp16")]; + fp16 lconv1ds_7_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_7_linear_start_output_min_to_fp16"), val = fp16(-0x1.74p+4)]; + fp16 lconv1ds_7_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_7_linear_start_output_max_to_fp16"), val = fp16(0x1.72p+4)]; + tensor clip_213_cast_fp16 = clip(alpha = lconv1ds_7_linear_start_output_min_to_fp16, beta = lconv1ds_7_linear_start_output_max_to_fp16, x = linear_85_cast_fp16)[name = string("clip_213_cast_fp16")]; + int32 hidden_states_783_split_num_splits_0 = const()[name = string("hidden_states_783_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_783_split_axis_0 = const()[name = string("hidden_states_783_split_axis_0"), val = int32(-1)]; + tensor hidden_states_783_split_cast_fp16_0, tensor hidden_states_783_split_cast_fp16_1 = split(axis = hidden_states_783_split_axis_0, num_splits = hidden_states_783_split_num_splits_0, x = clip_213_cast_fp16)[name = string("hidden_states_783_split_cast_fp16")]; + tensor hidden_states_783_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_783_split_cast_fp16_1)[name = string("hidden_states_783_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_783_cast_fp16 = mul(x = hidden_states_783_split_cast_fp16_0, y = hidden_states_783_split_1_sigmoid_cast_fp16)[name = string("hidden_states_783_cast_fp16")]; + tensor input_339_perm_0 = const()[name = string("input_339_perm_0"), val = tensor([0, 2, 1])]; + tensor input_341_pad_0 = const()[name = string("input_341_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_341_mode_0 = const()[name = string("input_341_mode_0"), val = string("constant")]; + fp16 const_117_to_fp16 = const()[name = string("const_117_to_fp16"), val = fp16(0x0p+0)]; + tensor input_339_cast_fp16 = transpose(perm = input_339_perm_0, x = hidden_states_783_cast_fp16)[name = string("transpose_25")]; + tensor input_341_cast_fp16 = pad(constant_val = const_117_to_fp16, mode = input_341_mode_0, pad = input_341_pad_0, x = input_339_cast_fp16)[name = string("input_341_cast_fp16")]; + string var_3627_pad_type_0 = const()[name = string("op_3627_pad_type_0"), val = string("valid")]; + int32 var_3627_groups_0 = const()[name = string("op_3627_groups_0"), val = int32(1024)]; + tensor var_3627_strides_0 = const()[name = string("op_3627_strides_0"), val = tensor([1])]; + tensor var_3627_pad_0 = const()[name = string("op_3627_pad_0"), val = tensor([0, 0])]; + tensor var_3627_dilations_0 = const()[name = string("op_3627_dilations_0"), val = tensor([1])]; + tensor lconv1ds_7_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92841536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92844160))))[name = string("lconv1ds_7_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_3627_cast_fp16 = conv(dilations = var_3627_dilations_0, groups = var_3627_groups_0, pad = var_3627_pad_0, pad_type = var_3627_pad_type_0, strides = var_3627_strides_0, weight = lconv1ds_7_depthwise_conv1d_weight_to_fp16_palettized, x = input_341_cast_fp16)[name = string("op_3627_cast_fp16")]; + tensor hidden_states_785_perm_0 = const()[name = string("hidden_states_785_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_785_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_785_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_785_cast_fp16 = transpose(perm = hidden_states_785_perm_0, x = var_3627_cast_fp16)[name = string("transpose_24")]; + tensor hidden_states_785_cast_fp16_to_fp32 = cast(dtype = hidden_states_785_cast_fp16_to_fp32_dtype_0, x = hidden_states_785_cast_fp16)[name = string("cast_198")]; + tensor clip_214 = clip(alpha = var_3576, beta = var_3575, x = hidden_states_785_cast_fp16_to_fp32)[name = string("clip_214")]; + fp32 var_3584_promoted_1 = const()[name = string("op_3584_promoted_1"), val = fp32(0x1p+1)]; + tensor var_3632 = pow(x = clip_214, y = var_3584_promoted_1)[name = string("op_3632")]; + tensor var_3634_axes_0 = const()[name = string("op_3634_axes_0"), val = tensor([-1])]; + bool var_3634_keep_dims_0 = const()[name = string("op_3634_keep_dims_0"), val = bool(true)]; + tensor var_3634 = reduce_mean(axes = var_3634_axes_0, keep_dims = var_3634_keep_dims_0, x = var_3632)[name = string("op_3634")]; + string var_3634_to_fp16_dtype_0 = const()[name = string("op_3634_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3635_to_fp16 = const()[name = string("op_3635_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3634_to_fp16 = cast(dtype = var_3634_to_fp16_dtype_0, x = var_3634)[name = string("cast_197")]; + tensor mean_squared_137_cast_fp16 = add(x = var_3634_to_fp16, y = var_3635_to_fp16)[name = string("mean_squared_137_cast_fp16")]; + string mean_squared_137_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_137_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_137_cast_fp16_to_fp32 = cast(dtype = mean_squared_137_cast_fp16_to_fp32_dtype_0, x = mean_squared_137_cast_fp16)[name = string("cast_196")]; + tensor var_3637 = pow(x = mean_squared_137_cast_fp16_to_fp32, y = var_3588)[name = string("op_3637")]; + string clip_214_to_fp16_dtype_0 = const()[name = string("clip_214_to_fp16_dtype_0"), val = string("fp16")]; + string var_3637_to_fp16_dtype_0 = const()[name = string("op_3637_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_214_to_fp16 = cast(dtype = clip_214_to_fp16_dtype_0, x = clip_214)[name = string("cast_194")]; + tensor var_3637_to_fp16 = cast(dtype = var_3637_to_fp16_dtype_0, x = var_3637)[name = string("cast_195")]; + tensor normed_output_273_cast_fp16 = mul(x = clip_214_to_fp16, y = var_3637_to_fp16)[name = string("normed_output_273_cast_fp16")]; + tensor const_118_to_fp16 = const()[name = string("const_118_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92845248)))]; + tensor normed_output_275_cast_fp16 = mul(x = normed_output_273_cast_fp16, y = const_118_to_fp16)[name = string("normed_output_275_cast_fp16")]; + tensor hidden_states_791_cast_fp16 = silu(x = normed_output_275_cast_fp16)[name = string("hidden_states_791_cast_fp16")]; + fp16 lconv1ds_7_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_7_linear_end_input_min_to_fp16"), val = fp16(-0x1p+3)]; + fp16 lconv1ds_7_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_7_linear_end_input_max_to_fp16"), val = fp16(0x1.fcp+2)]; + tensor clip_215_cast_fp16 = clip(alpha = lconv1ds_7_linear_end_input_min_to_fp16, beta = lconv1ds_7_linear_end_input_max_to_fp16, x = hidden_states_791_cast_fp16)[name = string("clip_215_cast_fp16")]; + tensor lconv1ds_7_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92847360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93371712))))[name = string("lconv1ds_7_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_86_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_7_linear_end_linear_weight_to_fp16_palettized, x = clip_215_cast_fp16)[name = string("linear_86_cast_fp16")]; + fp16 lconv1ds_7_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_7_linear_end_output_min_to_fp16"), val = fp16(-0x1.cap+2)]; + fp16 lconv1ds_7_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_7_linear_end_output_max_to_fp16"), val = fp16(0x1.c8p+2)]; + tensor clip_216_cast_fp16 = clip(alpha = lconv1ds_7_linear_end_output_min_to_fp16, beta = lconv1ds_7_linear_end_output_max_to_fp16, x = linear_86_cast_fp16)[name = string("clip_216_cast_fp16")]; + tensor hidden_states_797_cast_fp16 = add(x = clip_216_cast_fp16, y = hidden_states_775_cast_fp16)[name = string("hidden_states_797_cast_fp16")]; + string hidden_states_797_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_797_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3661 = const()[name = string("op_3661"), val = fp32(-0x1p-1)]; + fp32 var_3662 = const()[name = string("op_3662"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_3663 = const()[name = string("op_3663"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_797_cast_fp16_to_fp32 = cast(dtype = hidden_states_797_cast_fp16_to_fp32_dtype_0, x = hidden_states_797_cast_fp16)[name = string("cast_193")]; + tensor clip_217 = clip(alpha = var_3663, beta = var_3662, x = hidden_states_797_cast_fp16_to_fp32)[name = string("clip_217")]; + fp32 var_3657_promoted = const()[name = string("op_3657_promoted"), val = fp32(0x1p+1)]; + tensor var_3671 = pow(x = clip_217, y = var_3657_promoted)[name = string("op_3671")]; + tensor var_3673_axes_0 = const()[name = string("op_3673_axes_0"), val = tensor([-1])]; + bool var_3673_keep_dims_0 = const()[name = string("op_3673_keep_dims_0"), val = bool(true)]; + tensor var_3673 = reduce_mean(axes = var_3673_axes_0, keep_dims = var_3673_keep_dims_0, x = var_3671)[name = string("op_3673")]; + string var_3673_to_fp16_dtype_0 = const()[name = string("op_3673_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3674_to_fp16 = const()[name = string("op_3674_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3673_to_fp16 = cast(dtype = var_3673_to_fp16_dtype_0, x = var_3673)[name = string("cast_192")]; + tensor mean_squared_139_cast_fp16 = add(x = var_3673_to_fp16, y = var_3674_to_fp16)[name = string("mean_squared_139_cast_fp16")]; + string mean_squared_139_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_139_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_139_cast_fp16_to_fp32 = cast(dtype = mean_squared_139_cast_fp16_to_fp32_dtype_0, x = mean_squared_139_cast_fp16)[name = string("cast_191")]; + tensor var_3676 = pow(x = mean_squared_139_cast_fp16_to_fp32, y = var_3661)[name = string("op_3676")]; + string clip_217_to_fp16_dtype_0 = const()[name = string("clip_217_to_fp16_dtype_0"), val = string("fp16")]; + string var_3676_to_fp16_dtype_0 = const()[name = string("op_3676_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_217_to_fp16 = cast(dtype = clip_217_to_fp16_dtype_0, x = clip_217)[name = string("cast_189")]; + tensor var_3676_to_fp16 = cast(dtype = var_3676_to_fp16_dtype_0, x = var_3676)[name = string("cast_190")]; + tensor normed_output_277_cast_fp16 = mul(x = clip_217_to_fp16, y = var_3676_to_fp16)[name = string("normed_output_277_cast_fp16")]; + tensor const_119_to_fp16 = const()[name = string("const_119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93372800)))]; + tensor normed_output_279_cast_fp16 = mul(x = normed_output_277_cast_fp16, y = const_119_to_fp16)[name = string("normed_output_279_cast_fp16")]; + fp16 feed_forward2s_7_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_7_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.a8p+3)]; + fp16 feed_forward2s_7_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_7_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.a4p+3)]; + tensor clip_218_cast_fp16 = clip(alpha = feed_forward2s_7_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_7_ffw_layer_1_input_max_to_fp16, x = normed_output_279_cast_fp16)[name = string("clip_218_cast_fp16")]; + tensor feed_forward2s_7_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93374912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95472128))))[name = string("feed_forward2s_7_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_87_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_7_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_218_cast_fp16)[name = string("linear_87_cast_fp16")]; + fp16 feed_forward2s_7_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_7_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.d6p+4)]; + fp16 feed_forward2s_7_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_7_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.d2p+4)]; + tensor clip_219_cast_fp16 = clip(alpha = feed_forward2s_7_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_7_ffw_layer_1_output_max_to_fp16, x = linear_87_cast_fp16)[name = string("clip_219_cast_fp16")]; + tensor hidden_states_807_cast_fp16 = silu(x = clip_219_cast_fp16)[name = string("hidden_states_807_cast_fp16")]; + fp16 feed_forward2s_7_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_7_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.4p+3)]; + fp16 feed_forward2s_7_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_7_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.3ep+3)]; + tensor clip_220_cast_fp16 = clip(alpha = feed_forward2s_7_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_7_ffw_layer_2_input_max_to_fp16, x = hidden_states_807_cast_fp16)[name = string("clip_220_cast_fp16")]; + tensor feed_forward2s_7_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95476288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97573504))))[name = string("feed_forward2s_7_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_88_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_7_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_220_cast_fp16)[name = string("linear_88_cast_fp16")]; + fp16 feed_forward2s_7_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_7_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.bp+5)]; + fp16 feed_forward2s_7_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_7_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.aep+5)]; + tensor clip_221_cast_fp16 = clip(alpha = feed_forward2s_7_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_7_ffw_layer_2_output_max_to_fp16, x = linear_88_cast_fp16)[name = string("clip_221_cast_fp16")]; + string clip_221_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_221_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_221_cast_fp16_to_fp32 = cast(dtype = clip_221_cast_fp16_to_fp32_dtype_0, x = clip_221_cast_fp16)[name = string("cast_188")]; + tensor clip_222 = clip(alpha = var_3663, beta = var_3662, x = clip_221_cast_fp16_to_fp32)[name = string("clip_222")]; + fp32 var_3657_promoted_1 = const()[name = string("op_3657_promoted_1"), val = fp32(0x1p+1)]; + tensor var_3703 = pow(x = clip_222, y = var_3657_promoted_1)[name = string("op_3703")]; + tensor var_3705_axes_0 = const()[name = string("op_3705_axes_0"), val = tensor([-1])]; + bool var_3705_keep_dims_0 = const()[name = string("op_3705_keep_dims_0"), val = bool(true)]; + tensor var_3705 = reduce_mean(axes = var_3705_axes_0, keep_dims = var_3705_keep_dims_0, x = var_3703)[name = string("op_3705")]; + string var_3705_to_fp16_dtype_0 = const()[name = string("op_3705_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3706_to_fp16 = const()[name = string("op_3706_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3705_to_fp16 = cast(dtype = var_3705_to_fp16_dtype_0, x = var_3705)[name = string("cast_187")]; + tensor mean_squared_141_cast_fp16 = add(x = var_3705_to_fp16, y = var_3706_to_fp16)[name = string("mean_squared_141_cast_fp16")]; + string mean_squared_141_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_141_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_141_cast_fp16_to_fp32 = cast(dtype = mean_squared_141_cast_fp16_to_fp32_dtype_0, x = mean_squared_141_cast_fp16)[name = string("cast_186")]; + tensor var_3708 = pow(x = mean_squared_141_cast_fp16_to_fp32, y = var_3661)[name = string("op_3708")]; + string clip_222_to_fp16_dtype_0 = const()[name = string("clip_222_to_fp16_dtype_0"), val = string("fp16")]; + string var_3708_to_fp16_dtype_0 = const()[name = string("op_3708_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_222_to_fp16 = cast(dtype = clip_222_to_fp16_dtype_0, x = clip_222)[name = string("cast_184")]; + tensor var_3708_to_fp16 = cast(dtype = var_3708_to_fp16_dtype_0, x = var_3708)[name = string("cast_185")]; + tensor normed_output_281_cast_fp16 = mul(x = clip_222_to_fp16, y = var_3708_to_fp16)[name = string("normed_output_281_cast_fp16")]; + tensor const_120_to_fp16 = const()[name = string("const_120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97574592)))]; + tensor normed_output_283_cast_fp16 = mul(x = normed_output_281_cast_fp16, y = const_120_to_fp16)[name = string("normed_output_283_cast_fp16")]; + fp16 var_3653_to_fp16 = const()[name = string("op_3653_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_819_cast_fp16 = mul(x = normed_output_283_cast_fp16, y = var_3653_to_fp16)[name = string("hidden_states_819_cast_fp16")]; + tensor hidden_states_821_cast_fp16 = add(x = hidden_states_819_cast_fp16, y = hidden_states_797_cast_fp16)[name = string("hidden_states_821_cast_fp16")]; + fp16 var_3715_to_fp16 = const()[name = string("op_3715_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_3716_to_fp16 = const()[name = string("op_3716_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_223_cast_fp16 = clip(alpha = var_3715_to_fp16, beta = var_3716_to_fp16, x = hidden_states_821_cast_fp16)[name = string("clip_223_cast_fp16")]; + string clip_223_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_223_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3718 = const()[name = string("op_3718"), val = fp32(-0x1p-1)]; + fp32 var_3722_promoted = const()[name = string("op_3722_promoted"), val = fp32(0x1p+1)]; + tensor clip_223_cast_fp16_to_fp32 = cast(dtype = clip_223_cast_fp16_to_fp32_dtype_0, x = clip_223_cast_fp16)[name = string("cast_183")]; + tensor var_3728 = pow(x = clip_223_cast_fp16_to_fp32, y = var_3722_promoted)[name = string("op_3728")]; + tensor var_3730_axes_0 = const()[name = string("op_3730_axes_0"), val = tensor([-1])]; + bool var_3730_keep_dims_0 = const()[name = string("op_3730_keep_dims_0"), val = bool(true)]; + tensor var_3730 = reduce_mean(axes = var_3730_axes_0, keep_dims = var_3730_keep_dims_0, x = var_3728)[name = string("op_3730")]; + string var_3730_to_fp16_dtype_0 = const()[name = string("op_3730_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3731_to_fp16 = const()[name = string("op_3731_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3730_to_fp16 = cast(dtype = var_3730_to_fp16_dtype_0, x = var_3730)[name = string("cast_182")]; + tensor mean_squared_143_cast_fp16 = add(x = var_3730_to_fp16, y = var_3731_to_fp16)[name = string("mean_squared_143_cast_fp16")]; + string mean_squared_143_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_143_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_143_cast_fp16_to_fp32 = cast(dtype = mean_squared_143_cast_fp16_to_fp32_dtype_0, x = mean_squared_143_cast_fp16)[name = string("cast_181")]; + tensor var_3733 = pow(x = mean_squared_143_cast_fp16_to_fp32, y = var_3718)[name = string("op_3733")]; + string var_3733_to_fp16_dtype_0 = const()[name = string("op_3733_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_3733_to_fp16 = cast(dtype = var_3733_to_fp16_dtype_0, x = var_3733)[name = string("cast_180")]; + tensor normed_output_285_cast_fp16 = mul(x = clip_223_cast_fp16, y = var_3733_to_fp16)[name = string("normed_output_285_cast_fp16")]; + tensor const_121_to_fp16 = const()[name = string("const_121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97576704)))]; + tensor normed_output_287_cast_fp16 = mul(x = normed_output_285_cast_fp16, y = const_121_to_fp16)[name = string("normed_output_287_cast_fp16")]; + string normed_output_287_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_287_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3746 = const()[name = string("op_3746"), val = fp32(-0x1p-1)]; + fp32 var_3747 = const()[name = string("op_3747"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_3748 = const()[name = string("op_3748"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_287_cast_fp16_to_fp32 = cast(dtype = normed_output_287_cast_fp16_to_fp32_dtype_0, x = normed_output_287_cast_fp16)[name = string("cast_179")]; + tensor clip_224 = clip(alpha = var_3748, beta = var_3747, x = normed_output_287_cast_fp16_to_fp32)[name = string("clip_224")]; + fp32 var_3742_promoted = const()[name = string("op_3742_promoted"), val = fp32(0x1p+1)]; + tensor var_3756 = pow(x = clip_224, y = var_3742_promoted)[name = string("op_3756")]; + tensor var_3758_axes_0 = const()[name = string("op_3758_axes_0"), val = tensor([-1])]; + bool var_3758_keep_dims_0 = const()[name = string("op_3758_keep_dims_0"), val = bool(true)]; + tensor var_3758 = reduce_mean(axes = var_3758_axes_0, keep_dims = var_3758_keep_dims_0, x = var_3756)[name = string("op_3758")]; + string var_3758_to_fp16_dtype_0 = const()[name = string("op_3758_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3759_to_fp16 = const()[name = string("op_3759_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3758_to_fp16 = cast(dtype = var_3758_to_fp16_dtype_0, x = var_3758)[name = string("cast_178")]; + tensor mean_squared_145_cast_fp16 = add(x = var_3758_to_fp16, y = var_3759_to_fp16)[name = string("mean_squared_145_cast_fp16")]; + string mean_squared_145_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_145_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_145_cast_fp16_to_fp32 = cast(dtype = mean_squared_145_cast_fp16_to_fp32_dtype_0, x = mean_squared_145_cast_fp16)[name = string("cast_177")]; + tensor var_3761 = pow(x = mean_squared_145_cast_fp16_to_fp32, y = var_3746)[name = string("op_3761")]; + string clip_224_to_fp16_dtype_0 = const()[name = string("clip_224_to_fp16_dtype_0"), val = string("fp16")]; + string var_3761_to_fp16_dtype_0 = const()[name = string("op_3761_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_224_to_fp16 = cast(dtype = clip_224_to_fp16_dtype_0, x = clip_224)[name = string("cast_175")]; + tensor var_3761_to_fp16 = cast(dtype = var_3761_to_fp16_dtype_0, x = var_3761)[name = string("cast_176")]; + tensor normed_output_289_cast_fp16 = mul(x = clip_224_to_fp16, y = var_3761_to_fp16)[name = string("normed_output_289_cast_fp16")]; + tensor const_122_to_fp16 = const()[name = string("const_122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97578816)))]; + tensor normed_output_291_cast_fp16 = mul(x = normed_output_289_cast_fp16, y = const_122_to_fp16)[name = string("normed_output_291_cast_fp16")]; + fp16 feed_forward1s_8_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_8_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.66p+3)]; + fp16 feed_forward1s_8_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_8_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.64p+3)]; + tensor clip_225_cast_fp16 = clip(alpha = feed_forward1s_8_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_8_ffw_layer_1_input_max_to_fp16, x = normed_output_291_cast_fp16)[name = string("clip_225_cast_fp16")]; + tensor feed_forward1s_8_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97580928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99678144))))[name = string("feed_forward1s_8_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_8_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_225_cast_fp16)[name = string("linear_89_cast_fp16")]; + fp16 feed_forward1s_8_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_8_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.6cp+4)]; + fp16 feed_forward1s_8_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_8_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.6ap+4)]; + tensor clip_226_cast_fp16 = clip(alpha = feed_forward1s_8_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_8_ffw_layer_1_output_max_to_fp16, x = linear_89_cast_fp16)[name = string("clip_226_cast_fp16")]; + tensor hidden_states_837_cast_fp16 = silu(x = clip_226_cast_fp16)[name = string("hidden_states_837_cast_fp16")]; + fp16 feed_forward1s_8_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_8_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.1ap+3)]; + fp16 feed_forward1s_8_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_8_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.18p+3)]; + tensor clip_227_cast_fp16 = clip(alpha = feed_forward1s_8_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_8_ffw_layer_2_input_max_to_fp16, x = hidden_states_837_cast_fp16)[name = string("clip_227_cast_fp16")]; + tensor feed_forward1s_8_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99682304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101779520))))[name = string("feed_forward1s_8_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_90_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_8_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_227_cast_fp16)[name = string("linear_90_cast_fp16")]; + fp16 feed_forward1s_8_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_8_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.36p+5)]; + fp16 feed_forward1s_8_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_8_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.34p+5)]; + tensor clip_228_cast_fp16 = clip(alpha = feed_forward1s_8_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_8_ffw_layer_2_output_max_to_fp16, x = linear_90_cast_fp16)[name = string("clip_228_cast_fp16")]; + string clip_228_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_228_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_228_cast_fp16_to_fp32 = cast(dtype = clip_228_cast_fp16_to_fp32_dtype_0, x = clip_228_cast_fp16)[name = string("cast_174")]; + tensor clip_229 = clip(alpha = var_3748, beta = var_3747, x = clip_228_cast_fp16_to_fp32)[name = string("clip_229")]; + fp32 var_3742_promoted_1 = const()[name = string("op_3742_promoted_1"), val = fp32(0x1p+1)]; + tensor var_3788 = pow(x = clip_229, y = var_3742_promoted_1)[name = string("op_3788")]; + tensor var_3790_axes_0 = const()[name = string("op_3790_axes_0"), val = tensor([-1])]; + bool var_3790_keep_dims_0 = const()[name = string("op_3790_keep_dims_0"), val = bool(true)]; + tensor var_3790 = reduce_mean(axes = var_3790_axes_0, keep_dims = var_3790_keep_dims_0, x = var_3788)[name = string("op_3790")]; + string var_3790_to_fp16_dtype_0 = const()[name = string("op_3790_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3791_to_fp16 = const()[name = string("op_3791_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3790_to_fp16 = cast(dtype = var_3790_to_fp16_dtype_0, x = var_3790)[name = string("cast_173")]; + tensor mean_squared_147_cast_fp16 = add(x = var_3790_to_fp16, y = var_3791_to_fp16)[name = string("mean_squared_147_cast_fp16")]; + string mean_squared_147_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_147_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_147_cast_fp16_to_fp32 = cast(dtype = mean_squared_147_cast_fp16_to_fp32_dtype_0, x = mean_squared_147_cast_fp16)[name = string("cast_172")]; + tensor var_3793 = pow(x = mean_squared_147_cast_fp16_to_fp32, y = var_3746)[name = string("op_3793")]; + string clip_229_to_fp16_dtype_0 = const()[name = string("clip_229_to_fp16_dtype_0"), val = string("fp16")]; + string var_3793_to_fp16_dtype_0 = const()[name = string("op_3793_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_229_to_fp16 = cast(dtype = clip_229_to_fp16_dtype_0, x = clip_229)[name = string("cast_170")]; + tensor var_3793_to_fp16 = cast(dtype = var_3793_to_fp16_dtype_0, x = var_3793)[name = string("cast_171")]; + tensor normed_output_293_cast_fp16 = mul(x = clip_229_to_fp16, y = var_3793_to_fp16)[name = string("normed_output_293_cast_fp16")]; + tensor const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101780608)))]; + tensor normed_output_295_cast_fp16 = mul(x = normed_output_293_cast_fp16, y = const_123_to_fp16)[name = string("normed_output_295_cast_fp16")]; + fp16 var_3738_to_fp16 = const()[name = string("op_3738_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_849_cast_fp16 = mul(x = normed_output_295_cast_fp16, y = var_3738_to_fp16)[name = string("hidden_states_849_cast_fp16")]; + tensor hidden_states_851_cast_fp16 = add(x = hidden_states_849_cast_fp16, y = normed_output_287_cast_fp16)[name = string("hidden_states_851_cast_fp16")]; + fp16 var_3800_to_fp16 = const()[name = string("op_3800_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_3801_to_fp16 = const()[name = string("op_3801_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_230_cast_fp16 = clip(alpha = var_3800_to_fp16, beta = var_3801_to_fp16, x = hidden_states_851_cast_fp16)[name = string("clip_230_cast_fp16")]; + string clip_230_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_230_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3803 = const()[name = string("op_3803"), val = fp32(-0x1p-1)]; + fp32 var_3807_promoted = const()[name = string("op_3807_promoted"), val = fp32(0x1p+1)]; + tensor clip_230_cast_fp16_to_fp32 = cast(dtype = clip_230_cast_fp16_to_fp32_dtype_0, x = clip_230_cast_fp16)[name = string("cast_169")]; + tensor var_3813 = pow(x = clip_230_cast_fp16_to_fp32, y = var_3807_promoted)[name = string("op_3813")]; + tensor var_3815_axes_0 = const()[name = string("op_3815_axes_0"), val = tensor([-1])]; + bool var_3815_keep_dims_0 = const()[name = string("op_3815_keep_dims_0"), val = bool(true)]; + tensor var_3815 = reduce_mean(axes = var_3815_axes_0, keep_dims = var_3815_keep_dims_0, x = var_3813)[name = string("op_3815")]; + string var_3815_to_fp16_dtype_0 = const()[name = string("op_3815_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_3816_to_fp16 = const()[name = string("op_3816_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_3815_to_fp16 = cast(dtype = var_3815_to_fp16_dtype_0, x = var_3815)[name = string("cast_168")]; + tensor mean_squared_149_cast_fp16 = add(x = var_3815_to_fp16, y = var_3816_to_fp16)[name = string("mean_squared_149_cast_fp16")]; + string mean_squared_149_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_149_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_149_cast_fp16_to_fp32 = cast(dtype = mean_squared_149_cast_fp16_to_fp32_dtype_0, x = mean_squared_149_cast_fp16)[name = string("cast_167")]; + tensor var_3818 = pow(x = mean_squared_149_cast_fp16_to_fp32, y = var_3803)[name = string("op_3818")]; + string var_3818_to_fp16_dtype_0 = const()[name = string("op_3818_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_3818_to_fp16 = cast(dtype = var_3818_to_fp16_dtype_0, x = var_3818)[name = string("cast_166")]; + tensor normed_output_297_cast_fp16 = mul(x = clip_230_cast_fp16, y = var_3818_to_fp16)[name = string("normed_output_297_cast_fp16")]; + tensor const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101782720)))]; + tensor normed_output_299_cast_fp16 = mul(x = normed_output_297_cast_fp16, y = const_124_to_fp16)[name = string("normed_output_299_cast_fp16")]; + int32 var_3824 = const()[name = string("op_3824"), val = int32(-1)]; + fp32 var_3825 = const()[name = string("op_3825"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_8_q_proj_input_min_to_fp16 = const()[name = string("self_attns_8_q_proj_input_min_to_fp16"), val = fp16(-0x1.22p+3)]; + fp16 self_attns_8_q_proj_input_max_to_fp16 = const()[name = string("self_attns_8_q_proj_input_max_to_fp16"), val = fp16(0x1.2p+3)]; + tensor clip_231_cast_fp16 = clip(alpha = self_attns_8_q_proj_input_min_to_fp16, beta = self_attns_8_q_proj_input_max_to_fp16, x = normed_output_299_cast_fp16)[name = string("clip_231_cast_fp16")]; + tensor self_attns_8_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101784832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102309184))))[name = string("self_attns_8_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_8_q_proj_linear_weight_to_fp16_palettized, x = clip_231_cast_fp16)[name = string("linear_91_cast_fp16")]; + fp16 self_attns_8_q_proj_output_min_to_fp16 = const()[name = string("self_attns_8_q_proj_output_min_to_fp16"), val = fp16(-0x1.c2p+3)]; + fp16 self_attns_8_q_proj_output_max_to_fp16 = const()[name = string("self_attns_8_q_proj_output_max_to_fp16"), val = fp16(0x1.bep+3)]; + tensor clip_232_cast_fp16 = clip(alpha = self_attns_8_q_proj_output_min_to_fp16, beta = self_attns_8_q_proj_output_max_to_fp16, x = linear_91_cast_fp16)[name = string("clip_232_cast_fp16")]; + tensor var_3869 = const()[name = string("op_3869"), val = tensor([1, 50, 8, 128])]; + tensor q_17_cast_fp16 = reshape(shape = var_3869, x = clip_232_cast_fp16)[name = string("q_17_cast_fp16")]; + tensor self_attns_8_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102310272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102834624))))[name = string("self_attns_8_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_92_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_8_k_proj_linear_weight_to_fp16_palettized, x = clip_231_cast_fp16)[name = string("linear_92_cast_fp16")]; + fp16 self_attns_8_k_proj_output_min_to_fp16 = const()[name = string("self_attns_8_k_proj_output_min_to_fp16"), val = fp16(-0x1.c2p+3)]; + fp16 self_attns_8_k_proj_output_max_to_fp16 = const()[name = string("self_attns_8_k_proj_output_max_to_fp16"), val = fp16(0x1.bep+3)]; + tensor clip_234_cast_fp16 = clip(alpha = self_attns_8_k_proj_output_min_to_fp16, beta = self_attns_8_k_proj_output_max_to_fp16, x = linear_92_cast_fp16)[name = string("clip_234_cast_fp16")]; + tensor var_3881 = const()[name = string("op_3881"), val = tensor([1, 50, 8, 128])]; + tensor k_17_cast_fp16 = reshape(shape = var_3881, x = clip_234_cast_fp16)[name = string("k_17_cast_fp16")]; + tensor self_attns_8_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102835712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103360064))))[name = string("self_attns_8_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_93_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_8_v_proj_linear_weight_to_fp16_palettized, x = clip_231_cast_fp16)[name = string("linear_93_cast_fp16")]; + fp16 self_attns_8_v_proj_output_min_to_fp16 = const()[name = string("self_attns_8_v_proj_output_min_to_fp16"), val = fp16(-0x1.c2p+3)]; + fp16 self_attns_8_v_proj_output_max_to_fp16 = const()[name = string("self_attns_8_v_proj_output_max_to_fp16"), val = fp16(0x1.bep+3)]; + tensor clip_236_cast_fp16 = clip(alpha = self_attns_8_v_proj_output_min_to_fp16, beta = self_attns_8_v_proj_output_max_to_fp16, x = linear_93_cast_fp16)[name = string("clip_236_cast_fp16")]; + tensor var_3893 = const()[name = string("op_3893"), val = tensor([1, 50, 8, 128])]; + tensor input_369_cast_fp16 = reshape(shape = var_3893, x = clip_236_cast_fp16)[name = string("input_369_cast_fp16")]; + fp16 var_3895_to_fp16 = const()[name = string("op_3895_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_3896_cast_fp16 = mul(x = q_17_cast_fp16, y = var_3895_to_fp16)[name = string("op_3896_cast_fp16")]; + tensor var_3897_to_fp16 = const()[name = string("op_3897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103361152)))]; + tensor input_365_cast_fp16 = mul(x = var_3896_cast_fp16, y = var_3897_to_fp16)[name = string("input_365_cast_fp16")]; + fp16 var_3899_to_fp16 = const()[name = string("op_3899_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_367_cast_fp16 = mul(x = k_17_cast_fp16, y = var_3899_to_fp16)[name = string("input_367_cast_fp16")]; + tensor q_padded_17_pad_0 = const()[name = string("q_padded_17_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_17_mode_0 = const()[name = string("q_padded_17_mode_0"), val = string("constant")]; + fp16 const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_17_cast_fp16 = pad(constant_val = const_125_to_fp16, mode = q_padded_17_mode_0, pad = q_padded_17_pad_0, x = input_365_cast_fp16)[name = string("q_padded_17_cast_fp16")]; + tensor var_3903 = const()[name = string("op_3903"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_17_cast_fp16 = reshape(shape = var_3903, x = q_padded_17_cast_fp16)[name = string("q_blocks_17_cast_fp16")]; + tensor k_padded_17_pad_0 = const()[name = string("k_padded_17_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_17_mode_0 = const()[name = string("k_padded_17_mode_0"), val = string("constant")]; + fp16 const_126_to_fp16 = const()[name = string("const_126_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_17_cast_fp16 = pad(constant_val = const_126_to_fp16, mode = k_padded_17_mode_0, pad = k_padded_17_pad_0, x = input_367_cast_fp16)[name = string("k_padded_17_cast_fp16")]; + tensor v_padded_17_pad_0 = const()[name = string("v_padded_17_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_17_mode_0 = const()[name = string("v_padded_17_mode_0"), val = string("constant")]; + fp16 const_127_to_fp16 = const()[name = string("const_127_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_17_cast_fp16 = pad(constant_val = const_127_to_fp16, mode = v_padded_17_mode_0, pad = v_padded_17_pad_0, x = input_369_cast_fp16)[name = string("v_padded_17_cast_fp16")]; + tensor var_3910_begin_0 = const()[name = string("op_3910_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3910_end_0 = const()[name = string("op_3910_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_3910_end_mask_0 = const()[name = string("op_3910_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3910_cast_fp16 = slice_by_index(begin = var_3910_begin_0, end = var_3910_end_0, end_mask = var_3910_end_mask_0, x = k_padded_17_cast_fp16)[name = string("op_3910_cast_fp16")]; + tensor var_3912_begin_0 = const()[name = string("op_3912_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_3912_end_0 = const()[name = string("op_3912_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_3912_end_mask_0 = const()[name = string("op_3912_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3912_cast_fp16 = slice_by_index(begin = var_3912_begin_0, end = var_3912_end_0, end_mask = var_3912_end_mask_0, x = k_padded_17_cast_fp16)[name = string("op_3912_cast_fp16")]; + tensor var_3914_begin_0 = const()[name = string("op_3914_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_3914_end_0 = const()[name = string("op_3914_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_3914_end_mask_0 = const()[name = string("op_3914_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3914_cast_fp16 = slice_by_index(begin = var_3914_begin_0, end = var_3914_end_0, end_mask = var_3914_end_mask_0, x = k_padded_17_cast_fp16)[name = string("op_3914_cast_fp16")]; + tensor var_3916_begin_0 = const()[name = string("op_3916_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_3916_end_0 = const()[name = string("op_3916_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_3916_end_mask_0 = const()[name = string("op_3916_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3916_cast_fp16 = slice_by_index(begin = var_3916_begin_0, end = var_3916_end_0, end_mask = var_3916_end_mask_0, x = k_padded_17_cast_fp16)[name = string("op_3916_cast_fp16")]; + tensor var_3918_begin_0 = const()[name = string("op_3918_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_3918_end_0 = const()[name = string("op_3918_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_3918_end_mask_0 = const()[name = string("op_3918_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3918_cast_fp16 = slice_by_index(begin = var_3918_begin_0, end = var_3918_end_0, end_mask = var_3918_end_mask_0, x = k_padded_17_cast_fp16)[name = string("op_3918_cast_fp16")]; + int32 k_blocks_17_axis_0 = const()[name = string("k_blocks_17_axis_0"), val = int32(1)]; + tensor k_blocks_17_cast_fp16 = stack(axis = k_blocks_17_axis_0, values = (var_3910_cast_fp16, var_3912_cast_fp16, var_3914_cast_fp16, var_3916_cast_fp16, var_3918_cast_fp16))[name = string("k_blocks_17_cast_fp16")]; + tensor var_3922_begin_0 = const()[name = string("op_3922_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3922_end_0 = const()[name = string("op_3922_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_3922_end_mask_0 = const()[name = string("op_3922_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3922_cast_fp16 = slice_by_index(begin = var_3922_begin_0, end = var_3922_end_0, end_mask = var_3922_end_mask_0, x = v_padded_17_cast_fp16)[name = string("op_3922_cast_fp16")]; + tensor var_3924_begin_0 = const()[name = string("op_3924_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_3924_end_0 = const()[name = string("op_3924_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_3924_end_mask_0 = const()[name = string("op_3924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3924_cast_fp16 = slice_by_index(begin = var_3924_begin_0, end = var_3924_end_0, end_mask = var_3924_end_mask_0, x = v_padded_17_cast_fp16)[name = string("op_3924_cast_fp16")]; + tensor var_3926_begin_0 = const()[name = string("op_3926_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_3926_end_0 = const()[name = string("op_3926_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_3926_end_mask_0 = const()[name = string("op_3926_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3926_cast_fp16 = slice_by_index(begin = var_3926_begin_0, end = var_3926_end_0, end_mask = var_3926_end_mask_0, x = v_padded_17_cast_fp16)[name = string("op_3926_cast_fp16")]; + tensor var_3928_begin_0 = const()[name = string("op_3928_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_3928_end_0 = const()[name = string("op_3928_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_3928_end_mask_0 = const()[name = string("op_3928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3928_cast_fp16 = slice_by_index(begin = var_3928_begin_0, end = var_3928_end_0, end_mask = var_3928_end_mask_0, x = v_padded_17_cast_fp16)[name = string("op_3928_cast_fp16")]; + tensor var_3930_begin_0 = const()[name = string("op_3930_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_3930_end_0 = const()[name = string("op_3930_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_3930_end_mask_0 = const()[name = string("op_3930_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3930_cast_fp16 = slice_by_index(begin = var_3930_begin_0, end = var_3930_end_0, end_mask = var_3930_end_mask_0, x = v_padded_17_cast_fp16)[name = string("op_3930_cast_fp16")]; + int32 v_blocks_17_axis_0 = const()[name = string("v_blocks_17_axis_0"), val = int32(1)]; + tensor v_blocks_17_cast_fp16 = stack(axis = v_blocks_17_axis_0, values = (var_3922_cast_fp16, var_3924_cast_fp16, var_3926_cast_fp16, var_3928_cast_fp16, var_3930_cast_fp16))[name = string("v_blocks_17_cast_fp16")]; + tensor var_3938 = const()[name = string("op_3938"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_3940 = const()[name = string("op_3940"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_17_transpose_x_0 = const()[name = string("matrix_ac_17_transpose_x_0"), val = bool(false)]; + bool matrix_ac_17_transpose_y_0 = const()[name = string("matrix_ac_17_transpose_y_0"), val = bool(false)]; + tensor queries_17_cast_fp16 = transpose(perm = var_3938, x = q_blocks_17_cast_fp16)[name = string("transpose_22")]; + tensor keys_t_17_cast_fp16 = transpose(perm = var_3940, x = k_blocks_17_cast_fp16)[name = string("transpose_23")]; + tensor matrix_ac_17_cast_fp16 = matmul(transpose_x = matrix_ac_17_transpose_x_0, transpose_y = matrix_ac_17_transpose_y_0, x = queries_17_cast_fp16, y = keys_t_17_cast_fp16)[name = string("matrix_ac_17_cast_fp16")]; + tensor var_3943 = const()[name = string("op_3943"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_17_cast_fp16 = reshape(shape = var_3943, x = queries_17_cast_fp16)[name = string("q_flat_17_cast_fp16")]; + bool matrix_bd_81_transpose_x_0 = const()[name = string("matrix_bd_81_transpose_x_0"), val = bool(false)]; + bool matrix_bd_81_transpose_y_0 = const()[name = string("matrix_bd_81_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_17_to_fp16 = const()[name = string("rel_k_t_17_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103361472)))]; + tensor matrix_bd_81_cast_fp16 = matmul(transpose_x = matrix_bd_81_transpose_x_0, transpose_y = matrix_bd_81_transpose_y_0, x = q_flat_17_cast_fp16, y = rel_k_t_17_to_fp16)[name = string("matrix_bd_81_cast_fp16")]; + tensor var_3948 = const()[name = string("op_3948"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_371_cast_fp16 = reshape(shape = var_3948, x = matrix_bd_81_cast_fp16)[name = string("input_371_cast_fp16")]; + tensor matrix_bd_83_pad_0 = const()[name = string("matrix_bd_83_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103388160)))]; + string matrix_bd_83_mode_0 = const()[name = string("matrix_bd_83_mode_0"), val = string("constant")]; + fp16 const_129_to_fp16 = const()[name = string("const_129_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_83_cast_fp16 = pad(constant_val = const_129_to_fp16, mode = matrix_bd_83_mode_0, pad = matrix_bd_83_pad_0, x = input_371_cast_fp16)[name = string("matrix_bd_83_cast_fp16")]; + tensor var_3952 = const()[name = string("op_3952"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_85_cast_fp16 = reshape(shape = var_3952, x = matrix_bd_83_cast_fp16)[name = string("matrix_bd_85_cast_fp16")]; + tensor matrix_bd_87_begin_0 = const()[name = string("matrix_bd_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_87_end_0 = const()[name = string("matrix_bd_87_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_87_end_mask_0 = const()[name = string("matrix_bd_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_87_cast_fp16 = slice_by_index(begin = matrix_bd_87_begin_0, end = matrix_bd_87_end_0, end_mask = matrix_bd_87_end_mask_0, x = matrix_bd_85_cast_fp16)[name = string("matrix_bd_87_cast_fp16")]; + tensor var_3958 = const()[name = string("op_3958"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_89_cast_fp16 = reshape(shape = var_3958, x = matrix_bd_87_cast_fp16)[name = string("matrix_bd_89_cast_fp16")]; + tensor attn_49_cast_fp16 = add(x = matrix_ac_17_cast_fp16, y = matrix_bd_89_cast_fp16)[name = string("attn_49_cast_fp16")]; + fp16 _inversed_3961_y_0_to_fp16 = const()[name = string("_inversed_3961_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_3961_cast_fp16 = mul(x = attn_49_cast_fp16, y = _inversed_3961_y_0_to_fp16)[name = string("_inversed_3961_cast_fp16")]; + string _inversed_3961_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_3961_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_3961_cast_fp16_to_fp32 = cast(dtype = _inversed_3961_cast_fp16_to_fp32_dtype_0, x = _inversed_3961_cast_fp16)[name = string("cast_165")]; + tensor var_3962 = tanh(x = _inversed_3961_cast_fp16_to_fp32)[name = string("op_3962")]; + string var_3962_to_fp16_dtype_0 = const()[name = string("op_3962_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_8_softcap_to_fp16 = const()[name = string("self_attns_8_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_3962_to_fp16 = cast(dtype = var_3962_to_fp16_dtype_0, x = var_3962)[name = string("cast_164")]; + tensor attn_51_cast_fp16 = mul(x = var_3962_to_fp16, y = self_attns_8_softcap_to_fp16)[name = string("attn_51_cast_fp16")]; + string attn_51_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_51_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_51_cast_fp16_to_fp32 = cast(dtype = attn_51_cast_fp16_to_fp32_dtype_0, x = attn_51_cast_fp16)[name = string("cast_163")]; + tensor input_373 = select(a = var_3825, b = attn_51_cast_fp16_to_fp32, cond = var_460)[name = string("input_373")]; + tensor var_3966 = softmax(axis = var_3824, x = input_373)[name = string("op_3966")]; + tensor var_3968 = const()[name = string("op_3968"), val = tensor([0, 3, 1, -3, -1])]; + bool out_49_transpose_x_0 = const()[name = string("out_49_transpose_x_0"), val = bool(false)]; + bool out_49_transpose_y_0 = const()[name = string("out_49_transpose_y_0"), val = bool(false)]; + string var_3966_to_fp16_dtype_0 = const()[name = string("op_3966_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_17_cast_fp16 = transpose(perm = var_3968, x = v_blocks_17_cast_fp16)[name = string("transpose_21")]; + tensor var_3966_to_fp16 = cast(dtype = var_3966_to_fp16_dtype_0, x = var_3966)[name = string("cast_162")]; + tensor out_49_cast_fp16 = matmul(transpose_x = out_49_transpose_x_0, transpose_y = out_49_transpose_y_0, x = var_3966_to_fp16, y = values_t_17_cast_fp16)[name = string("out_49_cast_fp16")]; + tensor var_3971 = const()[name = string("op_3971"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_3973 = const()[name = string("op_3973"), val = tensor([1, 60, 1024])]; + tensor var_3972_cast_fp16 = transpose(perm = var_3971, x = out_49_cast_fp16)[name = string("transpose_20")]; + tensor out_51_cast_fp16 = reshape(shape = var_3973, x = var_3972_cast_fp16)[name = string("out_51_cast_fp16")]; + tensor var_3976_begin_0 = const()[name = string("op_3976_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3976_end_0 = const()[name = string("op_3976_end_0"), val = tensor([1, 50, 1024])]; + tensor var_3976_end_mask_0 = const()[name = string("op_3976_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3976_cast_fp16 = slice_by_index(begin = var_3976_begin_0, end = var_3976_end_0, end_mask = var_3976_end_mask_0, x = out_51_cast_fp16)[name = string("op_3976_cast_fp16")]; + fp16 self_attns_8_post_input_min_to_fp16 = const()[name = string("self_attns_8_post_input_min_to_fp16"), val = fp16(-0x1.aap+3)]; + fp16 self_attns_8_post_input_max_to_fp16 = const()[name = string("self_attns_8_post_input_max_to_fp16"), val = fp16(0x1.a6p+3)]; + tensor clip_237_cast_fp16 = clip(alpha = self_attns_8_post_input_min_to_fp16, beta = self_attns_8_post_input_max_to_fp16, x = var_3976_cast_fp16)[name = string("clip_237_cast_fp16")]; + tensor self_attns_8_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103388288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103912640))))[name = string("self_attns_8_post_linear_weight_to_fp16_palettized")]; + tensor linear_95_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_8_post_linear_weight_to_fp16_palettized, x = clip_237_cast_fp16)[name = string("linear_95_cast_fp16")]; + fp16 self_attns_8_post_output_min_to_fp16 = const()[name = string("self_attns_8_post_output_min_to_fp16"), val = fp16(-0x1.36p+5)]; + fp16 self_attns_8_post_output_max_to_fp16 = const()[name = string("self_attns_8_post_output_max_to_fp16"), val = fp16(0x1.34p+5)]; + tensor clip_238_cast_fp16 = clip(alpha = self_attns_8_post_output_min_to_fp16, beta = self_attns_8_post_output_max_to_fp16, x = linear_95_cast_fp16)[name = string("clip_238_cast_fp16")]; + fp16 var_3988_to_fp16 = const()[name = string("op_3988_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_3989_to_fp16 = const()[name = string("op_3989_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_239_cast_fp16 = clip(alpha = var_3988_to_fp16, beta = var_3989_to_fp16, x = clip_238_cast_fp16)[name = string("clip_239_cast_fp16")]; + string clip_239_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_239_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_3991 = const()[name = string("op_3991"), val = fp32(-0x1p-1)]; + fp32 var_3995_promoted = const()[name = string("op_3995_promoted"), val = fp32(0x1p+1)]; + tensor clip_239_cast_fp16_to_fp32 = cast(dtype = clip_239_cast_fp16_to_fp32_dtype_0, x = clip_239_cast_fp16)[name = string("cast_161")]; + tensor var_4001 = pow(x = clip_239_cast_fp16_to_fp32, y = var_3995_promoted)[name = string("op_4001")]; + tensor var_4003_axes_0 = const()[name = string("op_4003_axes_0"), val = tensor([-1])]; + bool var_4003_keep_dims_0 = const()[name = string("op_4003_keep_dims_0"), val = bool(true)]; + tensor var_4003 = reduce_mean(axes = var_4003_axes_0, keep_dims = var_4003_keep_dims_0, x = var_4001)[name = string("op_4003")]; + string var_4003_to_fp16_dtype_0 = const()[name = string("op_4003_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4004_to_fp16 = const()[name = string("op_4004_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4003_to_fp16 = cast(dtype = var_4003_to_fp16_dtype_0, x = var_4003)[name = string("cast_160")]; + tensor mean_squared_151_cast_fp16 = add(x = var_4003_to_fp16, y = var_4004_to_fp16)[name = string("mean_squared_151_cast_fp16")]; + string mean_squared_151_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_151_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_151_cast_fp16_to_fp32 = cast(dtype = mean_squared_151_cast_fp16_to_fp32_dtype_0, x = mean_squared_151_cast_fp16)[name = string("cast_159")]; + tensor var_4006 = pow(x = mean_squared_151_cast_fp16_to_fp32, y = var_3991)[name = string("op_4006")]; + string var_4006_to_fp16_dtype_0 = const()[name = string("op_4006_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4006_to_fp16 = cast(dtype = var_4006_to_fp16_dtype_0, x = var_4006)[name = string("cast_158")]; + tensor normed_output_301_cast_fp16 = mul(x = clip_239_cast_fp16, y = var_4006_to_fp16)[name = string("normed_output_301_cast_fp16")]; + tensor const_130_to_fp16 = const()[name = string("const_130_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103913728)))]; + tensor normed_output_303_cast_fp16 = mul(x = normed_output_301_cast_fp16, y = const_130_to_fp16)[name = string("normed_output_303_cast_fp16")]; + tensor hidden_states_877_cast_fp16 = add(x = normed_output_303_cast_fp16, y = hidden_states_851_cast_fp16)[name = string("hidden_states_877_cast_fp16")]; + string hidden_states_877_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_877_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4013 = const()[name = string("op_4013"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_4014 = const()[name = string("op_4014"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_4026 = const()[name = string("op_4026"), val = fp32(-0x1p-1)]; + fp32 var_4022_promoted = const()[name = string("op_4022_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_877_cast_fp16_to_fp32 = cast(dtype = hidden_states_877_cast_fp16_to_fp32_dtype_0, x = hidden_states_877_cast_fp16)[name = string("cast_157")]; + tensor var_4034 = pow(x = hidden_states_877_cast_fp16_to_fp32, y = var_4022_promoted)[name = string("op_4034")]; + tensor var_4036_axes_0 = const()[name = string("op_4036_axes_0"), val = tensor([-1])]; + bool var_4036_keep_dims_0 = const()[name = string("op_4036_keep_dims_0"), val = bool(true)]; + tensor var_4036 = reduce_mean(axes = var_4036_axes_0, keep_dims = var_4036_keep_dims_0, x = var_4034)[name = string("op_4036")]; + string var_4036_to_fp16_dtype_0 = const()[name = string("op_4036_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4037_to_fp16 = const()[name = string("op_4037_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4036_to_fp16 = cast(dtype = var_4036_to_fp16_dtype_0, x = var_4036)[name = string("cast_156")]; + tensor mean_squared_153_cast_fp16 = add(x = var_4036_to_fp16, y = var_4037_to_fp16)[name = string("mean_squared_153_cast_fp16")]; + string mean_squared_153_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_153_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_153_cast_fp16_to_fp32 = cast(dtype = mean_squared_153_cast_fp16_to_fp32_dtype_0, x = mean_squared_153_cast_fp16)[name = string("cast_155")]; + tensor var_4039 = pow(x = mean_squared_153_cast_fp16_to_fp32, y = var_4026)[name = string("op_4039")]; + string var_4039_to_fp16_dtype_0 = const()[name = string("op_4039_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4039_to_fp16 = cast(dtype = var_4039_to_fp16_dtype_0, x = var_4039)[name = string("cast_154")]; + tensor normed_output_305_cast_fp16 = mul(x = hidden_states_877_cast_fp16, y = var_4039_to_fp16)[name = string("normed_output_305_cast_fp16")]; + tensor const_131_to_fp16 = const()[name = string("const_131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103915840)))]; + tensor normed_output_307_cast_fp16 = mul(x = normed_output_305_cast_fp16, y = const_131_to_fp16)[name = string("normed_output_307_cast_fp16")]; + fp16 lconv1ds_8_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_8_linear_start_input_min_to_fp16"), val = fp16(-0x1.5ep+3)]; + fp16 lconv1ds_8_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_8_linear_start_input_max_to_fp16"), val = fp16(0x1.5cp+3)]; + tensor clip_240_cast_fp16 = clip(alpha = lconv1ds_8_linear_start_input_min_to_fp16, beta = lconv1ds_8_linear_start_input_max_to_fp16, x = normed_output_307_cast_fp16)[name = string("clip_240_cast_fp16")]; + tensor lconv1ds_8_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103917952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104966592))))[name = string("lconv1ds_8_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_96_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_8_linear_start_linear_weight_to_fp16_palettized, x = clip_240_cast_fp16)[name = string("linear_96_cast_fp16")]; + fp16 lconv1ds_8_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_8_linear_start_output_min_to_fp16"), val = fp16(-0x1.7ap+4)]; + fp16 lconv1ds_8_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_8_linear_start_output_max_to_fp16"), val = fp16(0x1.76p+4)]; + tensor clip_241_cast_fp16 = clip(alpha = lconv1ds_8_linear_start_output_min_to_fp16, beta = lconv1ds_8_linear_start_output_max_to_fp16, x = linear_96_cast_fp16)[name = string("clip_241_cast_fp16")]; + int32 hidden_states_885_split_num_splits_0 = const()[name = string("hidden_states_885_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_885_split_axis_0 = const()[name = string("hidden_states_885_split_axis_0"), val = int32(-1)]; + tensor hidden_states_885_split_cast_fp16_0, tensor hidden_states_885_split_cast_fp16_1 = split(axis = hidden_states_885_split_axis_0, num_splits = hidden_states_885_split_num_splits_0, x = clip_241_cast_fp16)[name = string("hidden_states_885_split_cast_fp16")]; + tensor hidden_states_885_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_885_split_cast_fp16_1)[name = string("hidden_states_885_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_885_cast_fp16 = mul(x = hidden_states_885_split_cast_fp16_0, y = hidden_states_885_split_1_sigmoid_cast_fp16)[name = string("hidden_states_885_cast_fp16")]; + tensor input_381_perm_0 = const()[name = string("input_381_perm_0"), val = tensor([0, 2, 1])]; + tensor input_383_pad_0 = const()[name = string("input_383_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_383_mode_0 = const()[name = string("input_383_mode_0"), val = string("constant")]; + fp16 const_132_to_fp16 = const()[name = string("const_132_to_fp16"), val = fp16(0x0p+0)]; + tensor input_381_cast_fp16 = transpose(perm = input_381_perm_0, x = hidden_states_885_cast_fp16)[name = string("transpose_19")]; + tensor input_383_cast_fp16 = pad(constant_val = const_132_to_fp16, mode = input_383_mode_0, pad = input_383_pad_0, x = input_381_cast_fp16)[name = string("input_383_cast_fp16")]; + string var_4065_pad_type_0 = const()[name = string("op_4065_pad_type_0"), val = string("valid")]; + int32 var_4065_groups_0 = const()[name = string("op_4065_groups_0"), val = int32(1024)]; + tensor var_4065_strides_0 = const()[name = string("op_4065_strides_0"), val = tensor([1])]; + tensor var_4065_pad_0 = const()[name = string("op_4065_pad_0"), val = tensor([0, 0])]; + tensor var_4065_dilations_0 = const()[name = string("op_4065_dilations_0"), val = tensor([1])]; + tensor lconv1ds_8_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104968704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104971328))))[name = string("lconv1ds_8_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_4065_cast_fp16 = conv(dilations = var_4065_dilations_0, groups = var_4065_groups_0, pad = var_4065_pad_0, pad_type = var_4065_pad_type_0, strides = var_4065_strides_0, weight = lconv1ds_8_depthwise_conv1d_weight_to_fp16_palettized, x = input_383_cast_fp16)[name = string("op_4065_cast_fp16")]; + tensor hidden_states_887_perm_0 = const()[name = string("hidden_states_887_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_887_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_887_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_887_cast_fp16 = transpose(perm = hidden_states_887_perm_0, x = var_4065_cast_fp16)[name = string("transpose_18")]; + tensor hidden_states_887_cast_fp16_to_fp32 = cast(dtype = hidden_states_887_cast_fp16_to_fp32_dtype_0, x = hidden_states_887_cast_fp16)[name = string("cast_153")]; + tensor clip_242 = clip(alpha = var_4014, beta = var_4013, x = hidden_states_887_cast_fp16_to_fp32)[name = string("clip_242")]; + fp32 var_4022_promoted_1 = const()[name = string("op_4022_promoted_1"), val = fp32(0x1p+1)]; + tensor var_4070 = pow(x = clip_242, y = var_4022_promoted_1)[name = string("op_4070")]; + tensor var_4072_axes_0 = const()[name = string("op_4072_axes_0"), val = tensor([-1])]; + bool var_4072_keep_dims_0 = const()[name = string("op_4072_keep_dims_0"), val = bool(true)]; + tensor var_4072 = reduce_mean(axes = var_4072_axes_0, keep_dims = var_4072_keep_dims_0, x = var_4070)[name = string("op_4072")]; + string var_4072_to_fp16_dtype_0 = const()[name = string("op_4072_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4073_to_fp16 = const()[name = string("op_4073_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4072_to_fp16 = cast(dtype = var_4072_to_fp16_dtype_0, x = var_4072)[name = string("cast_152")]; + tensor mean_squared_155_cast_fp16 = add(x = var_4072_to_fp16, y = var_4073_to_fp16)[name = string("mean_squared_155_cast_fp16")]; + string mean_squared_155_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_155_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_155_cast_fp16_to_fp32 = cast(dtype = mean_squared_155_cast_fp16_to_fp32_dtype_0, x = mean_squared_155_cast_fp16)[name = string("cast_151")]; + tensor var_4075 = pow(x = mean_squared_155_cast_fp16_to_fp32, y = var_4026)[name = string("op_4075")]; + string clip_242_to_fp16_dtype_0 = const()[name = string("clip_242_to_fp16_dtype_0"), val = string("fp16")]; + string var_4075_to_fp16_dtype_0 = const()[name = string("op_4075_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_242_to_fp16 = cast(dtype = clip_242_to_fp16_dtype_0, x = clip_242)[name = string("cast_149")]; + tensor var_4075_to_fp16 = cast(dtype = var_4075_to_fp16_dtype_0, x = var_4075)[name = string("cast_150")]; + tensor normed_output_309_cast_fp16 = mul(x = clip_242_to_fp16, y = var_4075_to_fp16)[name = string("normed_output_309_cast_fp16")]; + tensor const_133_to_fp16 = const()[name = string("const_133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104972416)))]; + tensor normed_output_311_cast_fp16 = mul(x = normed_output_309_cast_fp16, y = const_133_to_fp16)[name = string("normed_output_311_cast_fp16")]; + tensor hidden_states_893_cast_fp16 = silu(x = normed_output_311_cast_fp16)[name = string("hidden_states_893_cast_fp16")]; + fp16 lconv1ds_8_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_8_linear_end_input_min_to_fp16"), val = fp16(-0x1.fp+2)]; + fp16 lconv1ds_8_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_8_linear_end_input_max_to_fp16"), val = fp16(0x1.ecp+2)]; + tensor clip_243_cast_fp16 = clip(alpha = lconv1ds_8_linear_end_input_min_to_fp16, beta = lconv1ds_8_linear_end_input_max_to_fp16, x = hidden_states_893_cast_fp16)[name = string("clip_243_cast_fp16")]; + tensor lconv1ds_8_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104974528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105498880))))[name = string("lconv1ds_8_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_97_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_8_linear_end_linear_weight_to_fp16_palettized, x = clip_243_cast_fp16)[name = string("linear_97_cast_fp16")]; + fp16 lconv1ds_8_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_8_linear_end_output_min_to_fp16"), val = fp16(-0x1.c4p+2)]; + fp16 lconv1ds_8_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_8_linear_end_output_max_to_fp16"), val = fp16(0x1.cp+2)]; + tensor clip_244_cast_fp16 = clip(alpha = lconv1ds_8_linear_end_output_min_to_fp16, beta = lconv1ds_8_linear_end_output_max_to_fp16, x = linear_97_cast_fp16)[name = string("clip_244_cast_fp16")]; + tensor hidden_states_899_cast_fp16 = add(x = clip_244_cast_fp16, y = hidden_states_877_cast_fp16)[name = string("hidden_states_899_cast_fp16")]; + string hidden_states_899_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_899_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4099 = const()[name = string("op_4099"), val = fp32(-0x1p-1)]; + fp32 var_4100 = const()[name = string("op_4100"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_4101 = const()[name = string("op_4101"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_899_cast_fp16_to_fp32 = cast(dtype = hidden_states_899_cast_fp16_to_fp32_dtype_0, x = hidden_states_899_cast_fp16)[name = string("cast_148")]; + tensor clip_245 = clip(alpha = var_4101, beta = var_4100, x = hidden_states_899_cast_fp16_to_fp32)[name = string("clip_245")]; + fp32 var_4095_promoted = const()[name = string("op_4095_promoted"), val = fp32(0x1p+1)]; + tensor var_4109 = pow(x = clip_245, y = var_4095_promoted)[name = string("op_4109")]; + tensor var_4111_axes_0 = const()[name = string("op_4111_axes_0"), val = tensor([-1])]; + bool var_4111_keep_dims_0 = const()[name = string("op_4111_keep_dims_0"), val = bool(true)]; + tensor var_4111 = reduce_mean(axes = var_4111_axes_0, keep_dims = var_4111_keep_dims_0, x = var_4109)[name = string("op_4111")]; + string var_4111_to_fp16_dtype_0 = const()[name = string("op_4111_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4112_to_fp16 = const()[name = string("op_4112_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4111_to_fp16 = cast(dtype = var_4111_to_fp16_dtype_0, x = var_4111)[name = string("cast_147")]; + tensor mean_squared_157_cast_fp16 = add(x = var_4111_to_fp16, y = var_4112_to_fp16)[name = string("mean_squared_157_cast_fp16")]; + string mean_squared_157_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_157_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_157_cast_fp16_to_fp32 = cast(dtype = mean_squared_157_cast_fp16_to_fp32_dtype_0, x = mean_squared_157_cast_fp16)[name = string("cast_146")]; + tensor var_4114 = pow(x = mean_squared_157_cast_fp16_to_fp32, y = var_4099)[name = string("op_4114")]; + string clip_245_to_fp16_dtype_0 = const()[name = string("clip_245_to_fp16_dtype_0"), val = string("fp16")]; + string var_4114_to_fp16_dtype_0 = const()[name = string("op_4114_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_245_to_fp16 = cast(dtype = clip_245_to_fp16_dtype_0, x = clip_245)[name = string("cast_144")]; + tensor var_4114_to_fp16 = cast(dtype = var_4114_to_fp16_dtype_0, x = var_4114)[name = string("cast_145")]; + tensor normed_output_313_cast_fp16 = mul(x = clip_245_to_fp16, y = var_4114_to_fp16)[name = string("normed_output_313_cast_fp16")]; + tensor const_134_to_fp16 = const()[name = string("const_134_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105499968)))]; + tensor normed_output_315_cast_fp16 = mul(x = normed_output_313_cast_fp16, y = const_134_to_fp16)[name = string("normed_output_315_cast_fp16")]; + fp16 feed_forward2s_8_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_8_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.7cp+3)]; + fp16 feed_forward2s_8_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_8_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.78p+3)]; + tensor clip_246_cast_fp16 = clip(alpha = feed_forward2s_8_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_8_ffw_layer_1_input_max_to_fp16, x = normed_output_315_cast_fp16)[name = string("clip_246_cast_fp16")]; + tensor feed_forward2s_8_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105502080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107599296))))[name = string("feed_forward2s_8_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_98_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_8_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_246_cast_fp16)[name = string("linear_98_cast_fp16")]; + fp16 feed_forward2s_8_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_8_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.cp+4)]; + fp16 feed_forward2s_8_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_8_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.bcp+4)]; + tensor clip_247_cast_fp16 = clip(alpha = feed_forward2s_8_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_8_ffw_layer_1_output_max_to_fp16, x = linear_98_cast_fp16)[name = string("clip_247_cast_fp16")]; + tensor hidden_states_909_cast_fp16 = silu(x = clip_247_cast_fp16)[name = string("hidden_states_909_cast_fp16")]; + fp16 feed_forward2s_8_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_8_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.3ap+3)]; + fp16 feed_forward2s_8_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_8_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.38p+3)]; + tensor clip_248_cast_fp16 = clip(alpha = feed_forward2s_8_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_8_ffw_layer_2_input_max_to_fp16, x = hidden_states_909_cast_fp16)[name = string("clip_248_cast_fp16")]; + tensor feed_forward2s_8_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107603456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109700672))))[name = string("feed_forward2s_8_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_99_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_8_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_248_cast_fp16)[name = string("linear_99_cast_fp16")]; + fp16 feed_forward2s_8_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_8_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.86p+5)]; + fp16 feed_forward2s_8_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_8_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.82p+5)]; + tensor clip_249_cast_fp16 = clip(alpha = feed_forward2s_8_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_8_ffw_layer_2_output_max_to_fp16, x = linear_99_cast_fp16)[name = string("clip_249_cast_fp16")]; + string clip_249_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_249_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_249_cast_fp16_to_fp32 = cast(dtype = clip_249_cast_fp16_to_fp32_dtype_0, x = clip_249_cast_fp16)[name = string("cast_143")]; + tensor clip_250 = clip(alpha = var_4101, beta = var_4100, x = clip_249_cast_fp16_to_fp32)[name = string("clip_250")]; + fp32 var_4095_promoted_1 = const()[name = string("op_4095_promoted_1"), val = fp32(0x1p+1)]; + tensor var_4141 = pow(x = clip_250, y = var_4095_promoted_1)[name = string("op_4141")]; + tensor var_4143_axes_0 = const()[name = string("op_4143_axes_0"), val = tensor([-1])]; + bool var_4143_keep_dims_0 = const()[name = string("op_4143_keep_dims_0"), val = bool(true)]; + tensor var_4143 = reduce_mean(axes = var_4143_axes_0, keep_dims = var_4143_keep_dims_0, x = var_4141)[name = string("op_4143")]; + string var_4143_to_fp16_dtype_0 = const()[name = string("op_4143_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4144_to_fp16 = const()[name = string("op_4144_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4143_to_fp16 = cast(dtype = var_4143_to_fp16_dtype_0, x = var_4143)[name = string("cast_142")]; + tensor mean_squared_159_cast_fp16 = add(x = var_4143_to_fp16, y = var_4144_to_fp16)[name = string("mean_squared_159_cast_fp16")]; + string mean_squared_159_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_159_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_159_cast_fp16_to_fp32 = cast(dtype = mean_squared_159_cast_fp16_to_fp32_dtype_0, x = mean_squared_159_cast_fp16)[name = string("cast_141")]; + tensor var_4146 = pow(x = mean_squared_159_cast_fp16_to_fp32, y = var_4099)[name = string("op_4146")]; + string clip_250_to_fp16_dtype_0 = const()[name = string("clip_250_to_fp16_dtype_0"), val = string("fp16")]; + string var_4146_to_fp16_dtype_0 = const()[name = string("op_4146_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_250_to_fp16 = cast(dtype = clip_250_to_fp16_dtype_0, x = clip_250)[name = string("cast_139")]; + tensor var_4146_to_fp16 = cast(dtype = var_4146_to_fp16_dtype_0, x = var_4146)[name = string("cast_140")]; + tensor normed_output_317_cast_fp16 = mul(x = clip_250_to_fp16, y = var_4146_to_fp16)[name = string("normed_output_317_cast_fp16")]; + tensor const_135_to_fp16 = const()[name = string("const_135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109701760)))]; + tensor normed_output_319_cast_fp16 = mul(x = normed_output_317_cast_fp16, y = const_135_to_fp16)[name = string("normed_output_319_cast_fp16")]; + fp16 var_4091_to_fp16 = const()[name = string("op_4091_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_921_cast_fp16 = mul(x = normed_output_319_cast_fp16, y = var_4091_to_fp16)[name = string("hidden_states_921_cast_fp16")]; + tensor hidden_states_923_cast_fp16 = add(x = hidden_states_921_cast_fp16, y = hidden_states_899_cast_fp16)[name = string("hidden_states_923_cast_fp16")]; + fp16 var_4153_to_fp16 = const()[name = string("op_4153_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_4154_to_fp16 = const()[name = string("op_4154_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_251_cast_fp16 = clip(alpha = var_4153_to_fp16, beta = var_4154_to_fp16, x = hidden_states_923_cast_fp16)[name = string("clip_251_cast_fp16")]; + string clip_251_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_251_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4156 = const()[name = string("op_4156"), val = fp32(-0x1p-1)]; + fp32 var_4160_promoted = const()[name = string("op_4160_promoted"), val = fp32(0x1p+1)]; + tensor clip_251_cast_fp16_to_fp32 = cast(dtype = clip_251_cast_fp16_to_fp32_dtype_0, x = clip_251_cast_fp16)[name = string("cast_138")]; + tensor var_4166 = pow(x = clip_251_cast_fp16_to_fp32, y = var_4160_promoted)[name = string("op_4166")]; + tensor var_4168_axes_0 = const()[name = string("op_4168_axes_0"), val = tensor([-1])]; + bool var_4168_keep_dims_0 = const()[name = string("op_4168_keep_dims_0"), val = bool(true)]; + tensor var_4168 = reduce_mean(axes = var_4168_axes_0, keep_dims = var_4168_keep_dims_0, x = var_4166)[name = string("op_4168")]; + string var_4168_to_fp16_dtype_0 = const()[name = string("op_4168_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4169_to_fp16 = const()[name = string("op_4169_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4168_to_fp16 = cast(dtype = var_4168_to_fp16_dtype_0, x = var_4168)[name = string("cast_137")]; + tensor mean_squared_161_cast_fp16 = add(x = var_4168_to_fp16, y = var_4169_to_fp16)[name = string("mean_squared_161_cast_fp16")]; + string mean_squared_161_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_161_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_161_cast_fp16_to_fp32 = cast(dtype = mean_squared_161_cast_fp16_to_fp32_dtype_0, x = mean_squared_161_cast_fp16)[name = string("cast_136")]; + tensor var_4171 = pow(x = mean_squared_161_cast_fp16_to_fp32, y = var_4156)[name = string("op_4171")]; + string var_4171_to_fp16_dtype_0 = const()[name = string("op_4171_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4171_to_fp16 = cast(dtype = var_4171_to_fp16_dtype_0, x = var_4171)[name = string("cast_135")]; + tensor normed_output_321_cast_fp16 = mul(x = clip_251_cast_fp16, y = var_4171_to_fp16)[name = string("normed_output_321_cast_fp16")]; + tensor const_136_to_fp16 = const()[name = string("const_136_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109703872)))]; + tensor normed_output_323_cast_fp16 = mul(x = normed_output_321_cast_fp16, y = const_136_to_fp16)[name = string("normed_output_323_cast_fp16")]; + string normed_output_323_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_323_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4184 = const()[name = string("op_4184"), val = fp32(-0x1p-1)]; + fp32 var_4185 = const()[name = string("op_4185"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_4186 = const()[name = string("op_4186"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_323_cast_fp16_to_fp32 = cast(dtype = normed_output_323_cast_fp16_to_fp32_dtype_0, x = normed_output_323_cast_fp16)[name = string("cast_134")]; + tensor clip_252 = clip(alpha = var_4186, beta = var_4185, x = normed_output_323_cast_fp16_to_fp32)[name = string("clip_252")]; + fp32 var_4180_promoted = const()[name = string("op_4180_promoted"), val = fp32(0x1p+1)]; + tensor var_4194 = pow(x = clip_252, y = var_4180_promoted)[name = string("op_4194")]; + tensor var_4196_axes_0 = const()[name = string("op_4196_axes_0"), val = tensor([-1])]; + bool var_4196_keep_dims_0 = const()[name = string("op_4196_keep_dims_0"), val = bool(true)]; + tensor var_4196 = reduce_mean(axes = var_4196_axes_0, keep_dims = var_4196_keep_dims_0, x = var_4194)[name = string("op_4196")]; + string var_4196_to_fp16_dtype_0 = const()[name = string("op_4196_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4197_to_fp16 = const()[name = string("op_4197_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4196_to_fp16 = cast(dtype = var_4196_to_fp16_dtype_0, x = var_4196)[name = string("cast_133")]; + tensor mean_squared_163_cast_fp16 = add(x = var_4196_to_fp16, y = var_4197_to_fp16)[name = string("mean_squared_163_cast_fp16")]; + string mean_squared_163_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_163_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_163_cast_fp16_to_fp32 = cast(dtype = mean_squared_163_cast_fp16_to_fp32_dtype_0, x = mean_squared_163_cast_fp16)[name = string("cast_132")]; + tensor var_4199 = pow(x = mean_squared_163_cast_fp16_to_fp32, y = var_4184)[name = string("op_4199")]; + string clip_252_to_fp16_dtype_0 = const()[name = string("clip_252_to_fp16_dtype_0"), val = string("fp16")]; + string var_4199_to_fp16_dtype_0 = const()[name = string("op_4199_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_252_to_fp16 = cast(dtype = clip_252_to_fp16_dtype_0, x = clip_252)[name = string("cast_130")]; + tensor var_4199_to_fp16 = cast(dtype = var_4199_to_fp16_dtype_0, x = var_4199)[name = string("cast_131")]; + tensor normed_output_325_cast_fp16 = mul(x = clip_252_to_fp16, y = var_4199_to_fp16)[name = string("normed_output_325_cast_fp16")]; + tensor const_137_to_fp16 = const()[name = string("const_137_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109705984)))]; + tensor normed_output_327_cast_fp16 = mul(x = normed_output_325_cast_fp16, y = const_137_to_fp16)[name = string("normed_output_327_cast_fp16")]; + fp16 feed_forward1s_9_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_9_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.f4p+2)]; + fp16 feed_forward1s_9_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_9_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.fp+2)]; + tensor clip_253_cast_fp16 = clip(alpha = feed_forward1s_9_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_9_ffw_layer_1_input_max_to_fp16, x = normed_output_327_cast_fp16)[name = string("clip_253_cast_fp16")]; + tensor feed_forward1s_9_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109708096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111805312))))[name = string("feed_forward1s_9_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_9_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_253_cast_fp16)[name = string("linear_100_cast_fp16")]; + fp16 feed_forward1s_9_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_9_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.c8p+3)]; + fp16 feed_forward1s_9_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_9_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.c4p+3)]; + tensor clip_254_cast_fp16 = clip(alpha = feed_forward1s_9_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_9_ffw_layer_1_output_max_to_fp16, x = linear_100_cast_fp16)[name = string("clip_254_cast_fp16")]; + tensor hidden_states_939_cast_fp16 = silu(x = clip_254_cast_fp16)[name = string("hidden_states_939_cast_fp16")]; + fp16 feed_forward1s_9_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_9_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.08p+3)]; + fp16 feed_forward1s_9_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_9_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.06p+3)]; + tensor clip_255_cast_fp16 = clip(alpha = feed_forward1s_9_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_9_ffw_layer_2_input_max_to_fp16, x = hidden_states_939_cast_fp16)[name = string("clip_255_cast_fp16")]; + tensor feed_forward1s_9_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111809472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113906688))))[name = string("feed_forward1s_9_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_101_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_9_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_255_cast_fp16)[name = string("linear_101_cast_fp16")]; + fp16 feed_forward1s_9_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_9_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.4p+5)]; + fp16 feed_forward1s_9_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_9_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.3ep+5)]; + tensor clip_256_cast_fp16 = clip(alpha = feed_forward1s_9_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_9_ffw_layer_2_output_max_to_fp16, x = linear_101_cast_fp16)[name = string("clip_256_cast_fp16")]; + string clip_256_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_256_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_256_cast_fp16_to_fp32 = cast(dtype = clip_256_cast_fp16_to_fp32_dtype_0, x = clip_256_cast_fp16)[name = string("cast_129")]; + tensor clip_257 = clip(alpha = var_4186, beta = var_4185, x = clip_256_cast_fp16_to_fp32)[name = string("clip_257")]; + fp32 var_4180_promoted_1 = const()[name = string("op_4180_promoted_1"), val = fp32(0x1p+1)]; + tensor var_4226 = pow(x = clip_257, y = var_4180_promoted_1)[name = string("op_4226")]; + tensor var_4228_axes_0 = const()[name = string("op_4228_axes_0"), val = tensor([-1])]; + bool var_4228_keep_dims_0 = const()[name = string("op_4228_keep_dims_0"), val = bool(true)]; + tensor var_4228 = reduce_mean(axes = var_4228_axes_0, keep_dims = var_4228_keep_dims_0, x = var_4226)[name = string("op_4228")]; + string var_4228_to_fp16_dtype_0 = const()[name = string("op_4228_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4229_to_fp16 = const()[name = string("op_4229_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4228_to_fp16 = cast(dtype = var_4228_to_fp16_dtype_0, x = var_4228)[name = string("cast_128")]; + tensor mean_squared_165_cast_fp16 = add(x = var_4228_to_fp16, y = var_4229_to_fp16)[name = string("mean_squared_165_cast_fp16")]; + string mean_squared_165_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_165_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_165_cast_fp16_to_fp32 = cast(dtype = mean_squared_165_cast_fp16_to_fp32_dtype_0, x = mean_squared_165_cast_fp16)[name = string("cast_127")]; + tensor var_4231 = pow(x = mean_squared_165_cast_fp16_to_fp32, y = var_4184)[name = string("op_4231")]; + string clip_257_to_fp16_dtype_0 = const()[name = string("clip_257_to_fp16_dtype_0"), val = string("fp16")]; + string var_4231_to_fp16_dtype_0 = const()[name = string("op_4231_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_257_to_fp16 = cast(dtype = clip_257_to_fp16_dtype_0, x = clip_257)[name = string("cast_125")]; + tensor var_4231_to_fp16 = cast(dtype = var_4231_to_fp16_dtype_0, x = var_4231)[name = string("cast_126")]; + tensor normed_output_329_cast_fp16 = mul(x = clip_257_to_fp16, y = var_4231_to_fp16)[name = string("normed_output_329_cast_fp16")]; + tensor const_138_to_fp16 = const()[name = string("const_138_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113907776)))]; + tensor normed_output_331_cast_fp16 = mul(x = normed_output_329_cast_fp16, y = const_138_to_fp16)[name = string("normed_output_331_cast_fp16")]; + fp16 var_4176_to_fp16 = const()[name = string("op_4176_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_951_cast_fp16 = mul(x = normed_output_331_cast_fp16, y = var_4176_to_fp16)[name = string("hidden_states_951_cast_fp16")]; + tensor hidden_states_953_cast_fp16 = add(x = hidden_states_951_cast_fp16, y = normed_output_323_cast_fp16)[name = string("hidden_states_953_cast_fp16")]; + fp16 var_4238_to_fp16 = const()[name = string("op_4238_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_4239_to_fp16 = const()[name = string("op_4239_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_258_cast_fp16 = clip(alpha = var_4238_to_fp16, beta = var_4239_to_fp16, x = hidden_states_953_cast_fp16)[name = string("clip_258_cast_fp16")]; + string clip_258_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_258_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4241 = const()[name = string("op_4241"), val = fp32(-0x1p-1)]; + fp32 var_4245_promoted = const()[name = string("op_4245_promoted"), val = fp32(0x1p+1)]; + tensor clip_258_cast_fp16_to_fp32 = cast(dtype = clip_258_cast_fp16_to_fp32_dtype_0, x = clip_258_cast_fp16)[name = string("cast_124")]; + tensor var_4251 = pow(x = clip_258_cast_fp16_to_fp32, y = var_4245_promoted)[name = string("op_4251")]; + tensor var_4253_axes_0 = const()[name = string("op_4253_axes_0"), val = tensor([-1])]; + bool var_4253_keep_dims_0 = const()[name = string("op_4253_keep_dims_0"), val = bool(true)]; + tensor var_4253 = reduce_mean(axes = var_4253_axes_0, keep_dims = var_4253_keep_dims_0, x = var_4251)[name = string("op_4253")]; + string var_4253_to_fp16_dtype_0 = const()[name = string("op_4253_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4254_to_fp16 = const()[name = string("op_4254_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4253_to_fp16 = cast(dtype = var_4253_to_fp16_dtype_0, x = var_4253)[name = string("cast_123")]; + tensor mean_squared_167_cast_fp16 = add(x = var_4253_to_fp16, y = var_4254_to_fp16)[name = string("mean_squared_167_cast_fp16")]; + string mean_squared_167_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_167_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_167_cast_fp16_to_fp32 = cast(dtype = mean_squared_167_cast_fp16_to_fp32_dtype_0, x = mean_squared_167_cast_fp16)[name = string("cast_122")]; + tensor var_4256 = pow(x = mean_squared_167_cast_fp16_to_fp32, y = var_4241)[name = string("op_4256")]; + string var_4256_to_fp16_dtype_0 = const()[name = string("op_4256_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4256_to_fp16 = cast(dtype = var_4256_to_fp16_dtype_0, x = var_4256)[name = string("cast_121")]; + tensor normed_output_333_cast_fp16 = mul(x = clip_258_cast_fp16, y = var_4256_to_fp16)[name = string("normed_output_333_cast_fp16")]; + tensor const_139_to_fp16 = const()[name = string("const_139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113909888)))]; + tensor normed_output_335_cast_fp16 = mul(x = normed_output_333_cast_fp16, y = const_139_to_fp16)[name = string("normed_output_335_cast_fp16")]; + int32 var_4262 = const()[name = string("op_4262"), val = int32(-1)]; + fp32 var_4263 = const()[name = string("op_4263"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_9_q_proj_input_min_to_fp16 = const()[name = string("self_attns_9_q_proj_input_min_to_fp16"), val = fp16(-0x1.24p+3)]; + fp16 self_attns_9_q_proj_input_max_to_fp16 = const()[name = string("self_attns_9_q_proj_input_max_to_fp16"), val = fp16(0x1.22p+3)]; + tensor clip_259_cast_fp16 = clip(alpha = self_attns_9_q_proj_input_min_to_fp16, beta = self_attns_9_q_proj_input_max_to_fp16, x = normed_output_335_cast_fp16)[name = string("clip_259_cast_fp16")]; + tensor self_attns_9_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113912000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114436352))))[name = string("self_attns_9_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_102_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_9_q_proj_linear_weight_to_fp16_palettized, x = clip_259_cast_fp16)[name = string("linear_102_cast_fp16")]; + fp16 self_attns_9_q_proj_output_min_to_fp16 = const()[name = string("self_attns_9_q_proj_output_min_to_fp16"), val = fp16(-0x1.06p+4)]; + fp16 self_attns_9_q_proj_output_max_to_fp16 = const()[name = string("self_attns_9_q_proj_output_max_to_fp16"), val = fp16(0x1.04p+4)]; + tensor clip_260_cast_fp16 = clip(alpha = self_attns_9_q_proj_output_min_to_fp16, beta = self_attns_9_q_proj_output_max_to_fp16, x = linear_102_cast_fp16)[name = string("clip_260_cast_fp16")]; + tensor var_4307 = const()[name = string("op_4307"), val = tensor([1, 50, 8, 128])]; + tensor q_19_cast_fp16 = reshape(shape = var_4307, x = clip_260_cast_fp16)[name = string("q_19_cast_fp16")]; + tensor self_attns_9_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114437440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114961792))))[name = string("self_attns_9_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_103_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_9_k_proj_linear_weight_to_fp16_palettized, x = clip_259_cast_fp16)[name = string("linear_103_cast_fp16")]; + fp16 self_attns_9_k_proj_output_min_to_fp16 = const()[name = string("self_attns_9_k_proj_output_min_to_fp16"), val = fp16(-0x1.06p+4)]; + fp16 self_attns_9_k_proj_output_max_to_fp16 = const()[name = string("self_attns_9_k_proj_output_max_to_fp16"), val = fp16(0x1.04p+4)]; + tensor clip_262_cast_fp16 = clip(alpha = self_attns_9_k_proj_output_min_to_fp16, beta = self_attns_9_k_proj_output_max_to_fp16, x = linear_103_cast_fp16)[name = string("clip_262_cast_fp16")]; + tensor var_4319 = const()[name = string("op_4319"), val = tensor([1, 50, 8, 128])]; + tensor k_19_cast_fp16 = reshape(shape = var_4319, x = clip_262_cast_fp16)[name = string("k_19_cast_fp16")]; + tensor self_attns_9_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114962880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115487232))))[name = string("self_attns_9_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_104_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_9_v_proj_linear_weight_to_fp16_palettized, x = clip_259_cast_fp16)[name = string("linear_104_cast_fp16")]; + fp16 self_attns_9_v_proj_output_min_to_fp16 = const()[name = string("self_attns_9_v_proj_output_min_to_fp16"), val = fp16(-0x1.06p+4)]; + fp16 self_attns_9_v_proj_output_max_to_fp16 = const()[name = string("self_attns_9_v_proj_output_max_to_fp16"), val = fp16(0x1.04p+4)]; + tensor clip_264_cast_fp16 = clip(alpha = self_attns_9_v_proj_output_min_to_fp16, beta = self_attns_9_v_proj_output_max_to_fp16, x = linear_104_cast_fp16)[name = string("clip_264_cast_fp16")]; + tensor var_4331 = const()[name = string("op_4331"), val = tensor([1, 50, 8, 128])]; + tensor input_411_cast_fp16 = reshape(shape = var_4331, x = clip_264_cast_fp16)[name = string("input_411_cast_fp16")]; + fp16 var_4333_to_fp16 = const()[name = string("op_4333_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_4334_cast_fp16 = mul(x = q_19_cast_fp16, y = var_4333_to_fp16)[name = string("op_4334_cast_fp16")]; + tensor var_4335_to_fp16 = const()[name = string("op_4335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115488320)))]; + tensor input_407_cast_fp16 = mul(x = var_4334_cast_fp16, y = var_4335_to_fp16)[name = string("input_407_cast_fp16")]; + fp16 var_4337_to_fp16 = const()[name = string("op_4337_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_409_cast_fp16 = mul(x = k_19_cast_fp16, y = var_4337_to_fp16)[name = string("input_409_cast_fp16")]; + tensor q_padded_19_pad_0 = const()[name = string("q_padded_19_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_19_mode_0 = const()[name = string("q_padded_19_mode_0"), val = string("constant")]; + fp16 const_140_to_fp16 = const()[name = string("const_140_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_19_cast_fp16 = pad(constant_val = const_140_to_fp16, mode = q_padded_19_mode_0, pad = q_padded_19_pad_0, x = input_407_cast_fp16)[name = string("q_padded_19_cast_fp16")]; + tensor var_4341 = const()[name = string("op_4341"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_19_cast_fp16 = reshape(shape = var_4341, x = q_padded_19_cast_fp16)[name = string("q_blocks_19_cast_fp16")]; + tensor k_padded_19_pad_0 = const()[name = string("k_padded_19_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_19_mode_0 = const()[name = string("k_padded_19_mode_0"), val = string("constant")]; + fp16 const_141_to_fp16 = const()[name = string("const_141_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_19_cast_fp16 = pad(constant_val = const_141_to_fp16, mode = k_padded_19_mode_0, pad = k_padded_19_pad_0, x = input_409_cast_fp16)[name = string("k_padded_19_cast_fp16")]; + tensor v_padded_19_pad_0 = const()[name = string("v_padded_19_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_19_mode_0 = const()[name = string("v_padded_19_mode_0"), val = string("constant")]; + fp16 const_142_to_fp16 = const()[name = string("const_142_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_19_cast_fp16 = pad(constant_val = const_142_to_fp16, mode = v_padded_19_mode_0, pad = v_padded_19_pad_0, x = input_411_cast_fp16)[name = string("v_padded_19_cast_fp16")]; + tensor var_4348_begin_0 = const()[name = string("op_4348_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4348_end_0 = const()[name = string("op_4348_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_4348_end_mask_0 = const()[name = string("op_4348_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4348_cast_fp16 = slice_by_index(begin = var_4348_begin_0, end = var_4348_end_0, end_mask = var_4348_end_mask_0, x = k_padded_19_cast_fp16)[name = string("op_4348_cast_fp16")]; + tensor var_4350_begin_0 = const()[name = string("op_4350_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_4350_end_0 = const()[name = string("op_4350_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_4350_end_mask_0 = const()[name = string("op_4350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4350_cast_fp16 = slice_by_index(begin = var_4350_begin_0, end = var_4350_end_0, end_mask = var_4350_end_mask_0, x = k_padded_19_cast_fp16)[name = string("op_4350_cast_fp16")]; + tensor var_4352_begin_0 = const()[name = string("op_4352_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_4352_end_0 = const()[name = string("op_4352_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_4352_end_mask_0 = const()[name = string("op_4352_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4352_cast_fp16 = slice_by_index(begin = var_4352_begin_0, end = var_4352_end_0, end_mask = var_4352_end_mask_0, x = k_padded_19_cast_fp16)[name = string("op_4352_cast_fp16")]; + tensor var_4354_begin_0 = const()[name = string("op_4354_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_4354_end_0 = const()[name = string("op_4354_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_4354_end_mask_0 = const()[name = string("op_4354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4354_cast_fp16 = slice_by_index(begin = var_4354_begin_0, end = var_4354_end_0, end_mask = var_4354_end_mask_0, x = k_padded_19_cast_fp16)[name = string("op_4354_cast_fp16")]; + tensor var_4356_begin_0 = const()[name = string("op_4356_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_4356_end_0 = const()[name = string("op_4356_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_4356_end_mask_0 = const()[name = string("op_4356_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4356_cast_fp16 = slice_by_index(begin = var_4356_begin_0, end = var_4356_end_0, end_mask = var_4356_end_mask_0, x = k_padded_19_cast_fp16)[name = string("op_4356_cast_fp16")]; + int32 k_blocks_19_axis_0 = const()[name = string("k_blocks_19_axis_0"), val = int32(1)]; + tensor k_blocks_19_cast_fp16 = stack(axis = k_blocks_19_axis_0, values = (var_4348_cast_fp16, var_4350_cast_fp16, var_4352_cast_fp16, var_4354_cast_fp16, var_4356_cast_fp16))[name = string("k_blocks_19_cast_fp16")]; + tensor var_4360_begin_0 = const()[name = string("op_4360_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4360_end_0 = const()[name = string("op_4360_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_4360_end_mask_0 = const()[name = string("op_4360_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4360_cast_fp16 = slice_by_index(begin = var_4360_begin_0, end = var_4360_end_0, end_mask = var_4360_end_mask_0, x = v_padded_19_cast_fp16)[name = string("op_4360_cast_fp16")]; + tensor var_4362_begin_0 = const()[name = string("op_4362_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_4362_end_0 = const()[name = string("op_4362_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_4362_end_mask_0 = const()[name = string("op_4362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4362_cast_fp16 = slice_by_index(begin = var_4362_begin_0, end = var_4362_end_0, end_mask = var_4362_end_mask_0, x = v_padded_19_cast_fp16)[name = string("op_4362_cast_fp16")]; + tensor var_4364_begin_0 = const()[name = string("op_4364_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_4364_end_0 = const()[name = string("op_4364_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_4364_end_mask_0 = const()[name = string("op_4364_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4364_cast_fp16 = slice_by_index(begin = var_4364_begin_0, end = var_4364_end_0, end_mask = var_4364_end_mask_0, x = v_padded_19_cast_fp16)[name = string("op_4364_cast_fp16")]; + tensor var_4366_begin_0 = const()[name = string("op_4366_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_4366_end_0 = const()[name = string("op_4366_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_4366_end_mask_0 = const()[name = string("op_4366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4366_cast_fp16 = slice_by_index(begin = var_4366_begin_0, end = var_4366_end_0, end_mask = var_4366_end_mask_0, x = v_padded_19_cast_fp16)[name = string("op_4366_cast_fp16")]; + tensor var_4368_begin_0 = const()[name = string("op_4368_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_4368_end_0 = const()[name = string("op_4368_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_4368_end_mask_0 = const()[name = string("op_4368_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4368_cast_fp16 = slice_by_index(begin = var_4368_begin_0, end = var_4368_end_0, end_mask = var_4368_end_mask_0, x = v_padded_19_cast_fp16)[name = string("op_4368_cast_fp16")]; + int32 v_blocks_19_axis_0 = const()[name = string("v_blocks_19_axis_0"), val = int32(1)]; + tensor v_blocks_19_cast_fp16 = stack(axis = v_blocks_19_axis_0, values = (var_4360_cast_fp16, var_4362_cast_fp16, var_4364_cast_fp16, var_4366_cast_fp16, var_4368_cast_fp16))[name = string("v_blocks_19_cast_fp16")]; + tensor var_4376 = const()[name = string("op_4376"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_4378 = const()[name = string("op_4378"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_19_transpose_x_0 = const()[name = string("matrix_ac_19_transpose_x_0"), val = bool(false)]; + bool matrix_ac_19_transpose_y_0 = const()[name = string("matrix_ac_19_transpose_y_0"), val = bool(false)]; + tensor queries_19_cast_fp16 = transpose(perm = var_4376, x = q_blocks_19_cast_fp16)[name = string("transpose_16")]; + tensor keys_t_19_cast_fp16 = transpose(perm = var_4378, x = k_blocks_19_cast_fp16)[name = string("transpose_17")]; + tensor matrix_ac_19_cast_fp16 = matmul(transpose_x = matrix_ac_19_transpose_x_0, transpose_y = matrix_ac_19_transpose_y_0, x = queries_19_cast_fp16, y = keys_t_19_cast_fp16)[name = string("matrix_ac_19_cast_fp16")]; + tensor var_4381 = const()[name = string("op_4381"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_19_cast_fp16 = reshape(shape = var_4381, x = queries_19_cast_fp16)[name = string("q_flat_19_cast_fp16")]; + bool matrix_bd_91_transpose_x_0 = const()[name = string("matrix_bd_91_transpose_x_0"), val = bool(false)]; + bool matrix_bd_91_transpose_y_0 = const()[name = string("matrix_bd_91_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_19_to_fp16 = const()[name = string("rel_k_t_19_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115488640)))]; + tensor matrix_bd_91_cast_fp16 = matmul(transpose_x = matrix_bd_91_transpose_x_0, transpose_y = matrix_bd_91_transpose_y_0, x = q_flat_19_cast_fp16, y = rel_k_t_19_to_fp16)[name = string("matrix_bd_91_cast_fp16")]; + tensor var_4386 = const()[name = string("op_4386"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_413_cast_fp16 = reshape(shape = var_4386, x = matrix_bd_91_cast_fp16)[name = string("input_413_cast_fp16")]; + tensor matrix_bd_93_pad_0 = const()[name = string("matrix_bd_93_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115515328)))]; + string matrix_bd_93_mode_0 = const()[name = string("matrix_bd_93_mode_0"), val = string("constant")]; + fp16 const_144_to_fp16 = const()[name = string("const_144_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_93_cast_fp16 = pad(constant_val = const_144_to_fp16, mode = matrix_bd_93_mode_0, pad = matrix_bd_93_pad_0, x = input_413_cast_fp16)[name = string("matrix_bd_93_cast_fp16")]; + tensor var_4390 = const()[name = string("op_4390"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_95_cast_fp16 = reshape(shape = var_4390, x = matrix_bd_93_cast_fp16)[name = string("matrix_bd_95_cast_fp16")]; + tensor matrix_bd_97_begin_0 = const()[name = string("matrix_bd_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_97_end_0 = const()[name = string("matrix_bd_97_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_97_end_mask_0 = const()[name = string("matrix_bd_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_97_cast_fp16 = slice_by_index(begin = matrix_bd_97_begin_0, end = matrix_bd_97_end_0, end_mask = matrix_bd_97_end_mask_0, x = matrix_bd_95_cast_fp16)[name = string("matrix_bd_97_cast_fp16")]; + tensor var_4396 = const()[name = string("op_4396"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_99_cast_fp16 = reshape(shape = var_4396, x = matrix_bd_97_cast_fp16)[name = string("matrix_bd_99_cast_fp16")]; + tensor attn_55_cast_fp16 = add(x = matrix_ac_19_cast_fp16, y = matrix_bd_99_cast_fp16)[name = string("attn_55_cast_fp16")]; + fp16 _inversed_4399_y_0_to_fp16 = const()[name = string("_inversed_4399_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_4399_cast_fp16 = mul(x = attn_55_cast_fp16, y = _inversed_4399_y_0_to_fp16)[name = string("_inversed_4399_cast_fp16")]; + string _inversed_4399_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_4399_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_4399_cast_fp16_to_fp32 = cast(dtype = _inversed_4399_cast_fp16_to_fp32_dtype_0, x = _inversed_4399_cast_fp16)[name = string("cast_120")]; + tensor var_4400 = tanh(x = _inversed_4399_cast_fp16_to_fp32)[name = string("op_4400")]; + string var_4400_to_fp16_dtype_0 = const()[name = string("op_4400_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_9_softcap_to_fp16 = const()[name = string("self_attns_9_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_4400_to_fp16 = cast(dtype = var_4400_to_fp16_dtype_0, x = var_4400)[name = string("cast_119")]; + tensor attn_57_cast_fp16 = mul(x = var_4400_to_fp16, y = self_attns_9_softcap_to_fp16)[name = string("attn_57_cast_fp16")]; + string attn_57_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_57_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_57_cast_fp16_to_fp32 = cast(dtype = attn_57_cast_fp16_to_fp32_dtype_0, x = attn_57_cast_fp16)[name = string("cast_118")]; + tensor input_415 = select(a = var_4263, b = attn_57_cast_fp16_to_fp32, cond = var_460)[name = string("input_415")]; + tensor var_4404 = softmax(axis = var_4262, x = input_415)[name = string("op_4404")]; + tensor var_4406 = const()[name = string("op_4406"), val = tensor([0, 3, 1, -3, -1])]; + bool out_55_transpose_x_0 = const()[name = string("out_55_transpose_x_0"), val = bool(false)]; + bool out_55_transpose_y_0 = const()[name = string("out_55_transpose_y_0"), val = bool(false)]; + string var_4404_to_fp16_dtype_0 = const()[name = string("op_4404_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_19_cast_fp16 = transpose(perm = var_4406, x = v_blocks_19_cast_fp16)[name = string("transpose_15")]; + tensor var_4404_to_fp16 = cast(dtype = var_4404_to_fp16_dtype_0, x = var_4404)[name = string("cast_117")]; + tensor out_55_cast_fp16 = matmul(transpose_x = out_55_transpose_x_0, transpose_y = out_55_transpose_y_0, x = var_4404_to_fp16, y = values_t_19_cast_fp16)[name = string("out_55_cast_fp16")]; + tensor var_4409 = const()[name = string("op_4409"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_4411 = const()[name = string("op_4411"), val = tensor([1, 60, 1024])]; + tensor var_4410_cast_fp16 = transpose(perm = var_4409, x = out_55_cast_fp16)[name = string("transpose_14")]; + tensor out_57_cast_fp16 = reshape(shape = var_4411, x = var_4410_cast_fp16)[name = string("out_57_cast_fp16")]; + tensor var_4414_begin_0 = const()[name = string("op_4414_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4414_end_0 = const()[name = string("op_4414_end_0"), val = tensor([1, 50, 1024])]; + tensor var_4414_end_mask_0 = const()[name = string("op_4414_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4414_cast_fp16 = slice_by_index(begin = var_4414_begin_0, end = var_4414_end_0, end_mask = var_4414_end_mask_0, x = out_57_cast_fp16)[name = string("op_4414_cast_fp16")]; + fp16 self_attns_9_post_input_min_to_fp16 = const()[name = string("self_attns_9_post_input_min_to_fp16"), val = fp16(-0x1.fap+3)]; + fp16 self_attns_9_post_input_max_to_fp16 = const()[name = string("self_attns_9_post_input_max_to_fp16"), val = fp16(0x1.f6p+3)]; + tensor clip_265_cast_fp16 = clip(alpha = self_attns_9_post_input_min_to_fp16, beta = self_attns_9_post_input_max_to_fp16, x = var_4414_cast_fp16)[name = string("clip_265_cast_fp16")]; + tensor self_attns_9_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115515456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116039808))))[name = string("self_attns_9_post_linear_weight_to_fp16_palettized")]; + tensor linear_106_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_9_post_linear_weight_to_fp16_palettized, x = clip_265_cast_fp16)[name = string("linear_106_cast_fp16")]; + fp16 self_attns_9_post_output_min_to_fp16 = const()[name = string("self_attns_9_post_output_min_to_fp16"), val = fp16(-0x1.b8p+5)]; + fp16 self_attns_9_post_output_max_to_fp16 = const()[name = string("self_attns_9_post_output_max_to_fp16"), val = fp16(0x1.b4p+5)]; + tensor clip_266_cast_fp16 = clip(alpha = self_attns_9_post_output_min_to_fp16, beta = self_attns_9_post_output_max_to_fp16, x = linear_106_cast_fp16)[name = string("clip_266_cast_fp16")]; + fp16 var_4426_to_fp16 = const()[name = string("op_4426_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_4427_to_fp16 = const()[name = string("op_4427_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_267_cast_fp16 = clip(alpha = var_4426_to_fp16, beta = var_4427_to_fp16, x = clip_266_cast_fp16)[name = string("clip_267_cast_fp16")]; + string clip_267_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_267_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4429 = const()[name = string("op_4429"), val = fp32(-0x1p-1)]; + fp32 var_4433_promoted = const()[name = string("op_4433_promoted"), val = fp32(0x1p+1)]; + tensor clip_267_cast_fp16_to_fp32 = cast(dtype = clip_267_cast_fp16_to_fp32_dtype_0, x = clip_267_cast_fp16)[name = string("cast_116")]; + tensor var_4439 = pow(x = clip_267_cast_fp16_to_fp32, y = var_4433_promoted)[name = string("op_4439")]; + tensor var_4441_axes_0 = const()[name = string("op_4441_axes_0"), val = tensor([-1])]; + bool var_4441_keep_dims_0 = const()[name = string("op_4441_keep_dims_0"), val = bool(true)]; + tensor var_4441 = reduce_mean(axes = var_4441_axes_0, keep_dims = var_4441_keep_dims_0, x = var_4439)[name = string("op_4441")]; + string var_4441_to_fp16_dtype_0 = const()[name = string("op_4441_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4442_to_fp16 = const()[name = string("op_4442_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4441_to_fp16 = cast(dtype = var_4441_to_fp16_dtype_0, x = var_4441)[name = string("cast_115")]; + tensor mean_squared_169_cast_fp16 = add(x = var_4441_to_fp16, y = var_4442_to_fp16)[name = string("mean_squared_169_cast_fp16")]; + string mean_squared_169_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_169_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_169_cast_fp16_to_fp32 = cast(dtype = mean_squared_169_cast_fp16_to_fp32_dtype_0, x = mean_squared_169_cast_fp16)[name = string("cast_114")]; + tensor var_4444 = pow(x = mean_squared_169_cast_fp16_to_fp32, y = var_4429)[name = string("op_4444")]; + string var_4444_to_fp16_dtype_0 = const()[name = string("op_4444_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4444_to_fp16 = cast(dtype = var_4444_to_fp16_dtype_0, x = var_4444)[name = string("cast_113")]; + tensor normed_output_337_cast_fp16 = mul(x = clip_267_cast_fp16, y = var_4444_to_fp16)[name = string("normed_output_337_cast_fp16")]; + tensor const_145_to_fp16 = const()[name = string("const_145_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116040896)))]; + tensor normed_output_339_cast_fp16 = mul(x = normed_output_337_cast_fp16, y = const_145_to_fp16)[name = string("normed_output_339_cast_fp16")]; + tensor hidden_states_979_cast_fp16 = add(x = normed_output_339_cast_fp16, y = hidden_states_953_cast_fp16)[name = string("hidden_states_979_cast_fp16")]; + string hidden_states_979_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_979_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4451 = const()[name = string("op_4451"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_4452 = const()[name = string("op_4452"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_4464 = const()[name = string("op_4464"), val = fp32(-0x1p-1)]; + fp32 var_4460_promoted = const()[name = string("op_4460_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_979_cast_fp16_to_fp32 = cast(dtype = hidden_states_979_cast_fp16_to_fp32_dtype_0, x = hidden_states_979_cast_fp16)[name = string("cast_112")]; + tensor var_4472 = pow(x = hidden_states_979_cast_fp16_to_fp32, y = var_4460_promoted)[name = string("op_4472")]; + tensor var_4474_axes_0 = const()[name = string("op_4474_axes_0"), val = tensor([-1])]; + bool var_4474_keep_dims_0 = const()[name = string("op_4474_keep_dims_0"), val = bool(true)]; + tensor var_4474 = reduce_mean(axes = var_4474_axes_0, keep_dims = var_4474_keep_dims_0, x = var_4472)[name = string("op_4474")]; + string var_4474_to_fp16_dtype_0 = const()[name = string("op_4474_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4475_to_fp16 = const()[name = string("op_4475_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4474_to_fp16 = cast(dtype = var_4474_to_fp16_dtype_0, x = var_4474)[name = string("cast_111")]; + tensor mean_squared_171_cast_fp16 = add(x = var_4474_to_fp16, y = var_4475_to_fp16)[name = string("mean_squared_171_cast_fp16")]; + string mean_squared_171_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_171_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_171_cast_fp16_to_fp32 = cast(dtype = mean_squared_171_cast_fp16_to_fp32_dtype_0, x = mean_squared_171_cast_fp16)[name = string("cast_110")]; + tensor var_4477 = pow(x = mean_squared_171_cast_fp16_to_fp32, y = var_4464)[name = string("op_4477")]; + string var_4477_to_fp16_dtype_0 = const()[name = string("op_4477_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4477_to_fp16 = cast(dtype = var_4477_to_fp16_dtype_0, x = var_4477)[name = string("cast_109")]; + tensor normed_output_341_cast_fp16 = mul(x = hidden_states_979_cast_fp16, y = var_4477_to_fp16)[name = string("normed_output_341_cast_fp16")]; + tensor const_146_to_fp16 = const()[name = string("const_146_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116043008)))]; + tensor normed_output_343_cast_fp16 = mul(x = normed_output_341_cast_fp16, y = const_146_to_fp16)[name = string("normed_output_343_cast_fp16")]; + fp16 lconv1ds_9_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_9_linear_start_input_min_to_fp16"), val = fp16(-0x1.5p+3)]; + fp16 lconv1ds_9_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_9_linear_start_input_max_to_fp16"), val = fp16(0x1.4ep+3)]; + tensor clip_268_cast_fp16 = clip(alpha = lconv1ds_9_linear_start_input_min_to_fp16, beta = lconv1ds_9_linear_start_input_max_to_fp16, x = normed_output_343_cast_fp16)[name = string("clip_268_cast_fp16")]; + tensor lconv1ds_9_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116045120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117093760))))[name = string("lconv1ds_9_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_107_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_9_linear_start_linear_weight_to_fp16_palettized, x = clip_268_cast_fp16)[name = string("linear_107_cast_fp16")]; + fp16 lconv1ds_9_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_9_linear_start_output_min_to_fp16"), val = fp16(-0x1.86p+4)]; + fp16 lconv1ds_9_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_9_linear_start_output_max_to_fp16"), val = fp16(0x1.82p+4)]; + tensor clip_269_cast_fp16 = clip(alpha = lconv1ds_9_linear_start_output_min_to_fp16, beta = lconv1ds_9_linear_start_output_max_to_fp16, x = linear_107_cast_fp16)[name = string("clip_269_cast_fp16")]; + int32 hidden_states_987_split_num_splits_0 = const()[name = string("hidden_states_987_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_987_split_axis_0 = const()[name = string("hidden_states_987_split_axis_0"), val = int32(-1)]; + tensor hidden_states_987_split_cast_fp16_0, tensor hidden_states_987_split_cast_fp16_1 = split(axis = hidden_states_987_split_axis_0, num_splits = hidden_states_987_split_num_splits_0, x = clip_269_cast_fp16)[name = string("hidden_states_987_split_cast_fp16")]; + tensor hidden_states_987_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_987_split_cast_fp16_1)[name = string("hidden_states_987_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_987_cast_fp16 = mul(x = hidden_states_987_split_cast_fp16_0, y = hidden_states_987_split_1_sigmoid_cast_fp16)[name = string("hidden_states_987_cast_fp16")]; + tensor input_423_perm_0 = const()[name = string("input_423_perm_0"), val = tensor([0, 2, 1])]; + tensor input_425_pad_0 = const()[name = string("input_425_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_425_mode_0 = const()[name = string("input_425_mode_0"), val = string("constant")]; + fp16 const_147_to_fp16 = const()[name = string("const_147_to_fp16"), val = fp16(0x0p+0)]; + tensor input_423_cast_fp16 = transpose(perm = input_423_perm_0, x = hidden_states_987_cast_fp16)[name = string("transpose_13")]; + tensor input_425_cast_fp16 = pad(constant_val = const_147_to_fp16, mode = input_425_mode_0, pad = input_425_pad_0, x = input_423_cast_fp16)[name = string("input_425_cast_fp16")]; + string var_4503_pad_type_0 = const()[name = string("op_4503_pad_type_0"), val = string("valid")]; + int32 var_4503_groups_0 = const()[name = string("op_4503_groups_0"), val = int32(1024)]; + tensor var_4503_strides_0 = const()[name = string("op_4503_strides_0"), val = tensor([1])]; + tensor var_4503_pad_0 = const()[name = string("op_4503_pad_0"), val = tensor([0, 0])]; + tensor var_4503_dilations_0 = const()[name = string("op_4503_dilations_0"), val = tensor([1])]; + tensor lconv1ds_9_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117095872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117098496))))[name = string("lconv1ds_9_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_4503_cast_fp16 = conv(dilations = var_4503_dilations_0, groups = var_4503_groups_0, pad = var_4503_pad_0, pad_type = var_4503_pad_type_0, strides = var_4503_strides_0, weight = lconv1ds_9_depthwise_conv1d_weight_to_fp16_palettized, x = input_425_cast_fp16)[name = string("op_4503_cast_fp16")]; + tensor hidden_states_989_perm_0 = const()[name = string("hidden_states_989_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_989_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_989_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_989_cast_fp16 = transpose(perm = hidden_states_989_perm_0, x = var_4503_cast_fp16)[name = string("transpose_12")]; + tensor hidden_states_989_cast_fp16_to_fp32 = cast(dtype = hidden_states_989_cast_fp16_to_fp32_dtype_0, x = hidden_states_989_cast_fp16)[name = string("cast_108")]; + tensor clip_270 = clip(alpha = var_4452, beta = var_4451, x = hidden_states_989_cast_fp16_to_fp32)[name = string("clip_270")]; + fp32 var_4460_promoted_1 = const()[name = string("op_4460_promoted_1"), val = fp32(0x1p+1)]; + tensor var_4508 = pow(x = clip_270, y = var_4460_promoted_1)[name = string("op_4508")]; + tensor var_4510_axes_0 = const()[name = string("op_4510_axes_0"), val = tensor([-1])]; + bool var_4510_keep_dims_0 = const()[name = string("op_4510_keep_dims_0"), val = bool(true)]; + tensor var_4510 = reduce_mean(axes = var_4510_axes_0, keep_dims = var_4510_keep_dims_0, x = var_4508)[name = string("op_4510")]; + string var_4510_to_fp16_dtype_0 = const()[name = string("op_4510_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4511_to_fp16 = const()[name = string("op_4511_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4510_to_fp16 = cast(dtype = var_4510_to_fp16_dtype_0, x = var_4510)[name = string("cast_107")]; + tensor mean_squared_173_cast_fp16 = add(x = var_4510_to_fp16, y = var_4511_to_fp16)[name = string("mean_squared_173_cast_fp16")]; + string mean_squared_173_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_173_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_173_cast_fp16_to_fp32 = cast(dtype = mean_squared_173_cast_fp16_to_fp32_dtype_0, x = mean_squared_173_cast_fp16)[name = string("cast_106")]; + tensor var_4513 = pow(x = mean_squared_173_cast_fp16_to_fp32, y = var_4464)[name = string("op_4513")]; + string clip_270_to_fp16_dtype_0 = const()[name = string("clip_270_to_fp16_dtype_0"), val = string("fp16")]; + string var_4513_to_fp16_dtype_0 = const()[name = string("op_4513_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_270_to_fp16 = cast(dtype = clip_270_to_fp16_dtype_0, x = clip_270)[name = string("cast_104")]; + tensor var_4513_to_fp16 = cast(dtype = var_4513_to_fp16_dtype_0, x = var_4513)[name = string("cast_105")]; + tensor normed_output_345_cast_fp16 = mul(x = clip_270_to_fp16, y = var_4513_to_fp16)[name = string("normed_output_345_cast_fp16")]; + tensor const_148_to_fp16 = const()[name = string("const_148_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117099584)))]; + tensor normed_output_347_cast_fp16 = mul(x = normed_output_345_cast_fp16, y = const_148_to_fp16)[name = string("normed_output_347_cast_fp16")]; + tensor hidden_states_995_cast_fp16 = silu(x = normed_output_347_cast_fp16)[name = string("hidden_states_995_cast_fp16")]; + fp16 lconv1ds_9_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_9_linear_end_input_min_to_fp16"), val = fp16(-0x1.02p+3)]; + fp16 lconv1ds_9_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_9_linear_end_input_max_to_fp16"), val = fp16(0x1p+3)]; + tensor clip_271_cast_fp16 = clip(alpha = lconv1ds_9_linear_end_input_min_to_fp16, beta = lconv1ds_9_linear_end_input_max_to_fp16, x = hidden_states_995_cast_fp16)[name = string("clip_271_cast_fp16")]; + tensor lconv1ds_9_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117101696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117626048))))[name = string("lconv1ds_9_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_108_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_9_linear_end_linear_weight_to_fp16_palettized, x = clip_271_cast_fp16)[name = string("linear_108_cast_fp16")]; + fp16 lconv1ds_9_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_9_linear_end_output_min_to_fp16"), val = fp16(-0x1.02p+3)]; + fp16 lconv1ds_9_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_9_linear_end_output_max_to_fp16"), val = fp16(0x1p+3)]; + tensor clip_272_cast_fp16 = clip(alpha = lconv1ds_9_linear_end_output_min_to_fp16, beta = lconv1ds_9_linear_end_output_max_to_fp16, x = linear_108_cast_fp16)[name = string("clip_272_cast_fp16")]; + tensor hidden_states_1001_cast_fp16 = add(x = clip_272_cast_fp16, y = hidden_states_979_cast_fp16)[name = string("hidden_states_1001_cast_fp16")]; + string hidden_states_1001_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_1001_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4537 = const()[name = string("op_4537"), val = fp32(-0x1p-1)]; + fp32 var_4538 = const()[name = string("op_4538"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_4539 = const()[name = string("op_4539"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_1001_cast_fp16_to_fp32 = cast(dtype = hidden_states_1001_cast_fp16_to_fp32_dtype_0, x = hidden_states_1001_cast_fp16)[name = string("cast_103")]; + tensor clip_273 = clip(alpha = var_4539, beta = var_4538, x = hidden_states_1001_cast_fp16_to_fp32)[name = string("clip_273")]; + fp32 var_4533_promoted = const()[name = string("op_4533_promoted"), val = fp32(0x1p+1)]; + tensor var_4547 = pow(x = clip_273, y = var_4533_promoted)[name = string("op_4547")]; + tensor var_4549_axes_0 = const()[name = string("op_4549_axes_0"), val = tensor([-1])]; + bool var_4549_keep_dims_0 = const()[name = string("op_4549_keep_dims_0"), val = bool(true)]; + tensor var_4549 = reduce_mean(axes = var_4549_axes_0, keep_dims = var_4549_keep_dims_0, x = var_4547)[name = string("op_4549")]; + string var_4549_to_fp16_dtype_0 = const()[name = string("op_4549_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4550_to_fp16 = const()[name = string("op_4550_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4549_to_fp16 = cast(dtype = var_4549_to_fp16_dtype_0, x = var_4549)[name = string("cast_102")]; + tensor mean_squared_175_cast_fp16 = add(x = var_4549_to_fp16, y = var_4550_to_fp16)[name = string("mean_squared_175_cast_fp16")]; + string mean_squared_175_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_175_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_175_cast_fp16_to_fp32 = cast(dtype = mean_squared_175_cast_fp16_to_fp32_dtype_0, x = mean_squared_175_cast_fp16)[name = string("cast_101")]; + tensor var_4552 = pow(x = mean_squared_175_cast_fp16_to_fp32, y = var_4537)[name = string("op_4552")]; + string clip_273_to_fp16_dtype_0 = const()[name = string("clip_273_to_fp16_dtype_0"), val = string("fp16")]; + string var_4552_to_fp16_dtype_0 = const()[name = string("op_4552_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_273_to_fp16 = cast(dtype = clip_273_to_fp16_dtype_0, x = clip_273)[name = string("cast_99")]; + tensor var_4552_to_fp16 = cast(dtype = var_4552_to_fp16_dtype_0, x = var_4552)[name = string("cast_100")]; + tensor normed_output_349_cast_fp16 = mul(x = clip_273_to_fp16, y = var_4552_to_fp16)[name = string("normed_output_349_cast_fp16")]; + tensor const_149_to_fp16 = const()[name = string("const_149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117627136)))]; + tensor normed_output_351_cast_fp16 = mul(x = normed_output_349_cast_fp16, y = const_149_to_fp16)[name = string("normed_output_351_cast_fp16")]; + fp16 feed_forward2s_9_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_9_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.92p+3)]; + fp16 feed_forward2s_9_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_9_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.9p+3)]; + tensor clip_274_cast_fp16 = clip(alpha = feed_forward2s_9_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_9_ffw_layer_1_input_max_to_fp16, x = normed_output_351_cast_fp16)[name = string("clip_274_cast_fp16")]; + tensor feed_forward2s_9_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117629248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119726464))))[name = string("feed_forward2s_9_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_9_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_274_cast_fp16)[name = string("linear_109_cast_fp16")]; + fp16 feed_forward2s_9_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_9_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.e6p+4)]; + fp16 feed_forward2s_9_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_9_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.e2p+4)]; + tensor clip_275_cast_fp16 = clip(alpha = feed_forward2s_9_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_9_ffw_layer_1_output_max_to_fp16, x = linear_109_cast_fp16)[name = string("clip_275_cast_fp16")]; + tensor hidden_states_1011_cast_fp16 = silu(x = clip_275_cast_fp16)[name = string("hidden_states_1011_cast_fp16")]; + fp16 feed_forward2s_9_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_9_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.b4p+3)]; + fp16 feed_forward2s_9_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_9_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.bp+3)]; + tensor clip_276_cast_fp16 = clip(alpha = feed_forward2s_9_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_9_ffw_layer_2_input_max_to_fp16, x = hidden_states_1011_cast_fp16)[name = string("clip_276_cast_fp16")]; + tensor feed_forward2s_9_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119730624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121827840))))[name = string("feed_forward2s_9_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_110_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_9_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_276_cast_fp16)[name = string("linear_110_cast_fp16")]; + fp16 feed_forward2s_9_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_9_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.68p+6)]; + fp16 feed_forward2s_9_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_9_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.66p+6)]; + tensor clip_277_cast_fp16 = clip(alpha = feed_forward2s_9_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_9_ffw_layer_2_output_max_to_fp16, x = linear_110_cast_fp16)[name = string("clip_277_cast_fp16")]; + string clip_277_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_277_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_277_cast_fp16_to_fp32 = cast(dtype = clip_277_cast_fp16_to_fp32_dtype_0, x = clip_277_cast_fp16)[name = string("cast_98")]; + tensor clip_278 = clip(alpha = var_4539, beta = var_4538, x = clip_277_cast_fp16_to_fp32)[name = string("clip_278")]; + fp32 var_4533_promoted_1 = const()[name = string("op_4533_promoted_1"), val = fp32(0x1p+1)]; + tensor var_4579 = pow(x = clip_278, y = var_4533_promoted_1)[name = string("op_4579")]; + tensor var_4581_axes_0 = const()[name = string("op_4581_axes_0"), val = tensor([-1])]; + bool var_4581_keep_dims_0 = const()[name = string("op_4581_keep_dims_0"), val = bool(true)]; + tensor var_4581 = reduce_mean(axes = var_4581_axes_0, keep_dims = var_4581_keep_dims_0, x = var_4579)[name = string("op_4581")]; + string var_4581_to_fp16_dtype_0 = const()[name = string("op_4581_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4582_to_fp16 = const()[name = string("op_4582_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4581_to_fp16 = cast(dtype = var_4581_to_fp16_dtype_0, x = var_4581)[name = string("cast_97")]; + tensor mean_squared_177_cast_fp16 = add(x = var_4581_to_fp16, y = var_4582_to_fp16)[name = string("mean_squared_177_cast_fp16")]; + string mean_squared_177_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_177_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_177_cast_fp16_to_fp32 = cast(dtype = mean_squared_177_cast_fp16_to_fp32_dtype_0, x = mean_squared_177_cast_fp16)[name = string("cast_96")]; + tensor var_4584 = pow(x = mean_squared_177_cast_fp16_to_fp32, y = var_4537)[name = string("op_4584")]; + string clip_278_to_fp16_dtype_0 = const()[name = string("clip_278_to_fp16_dtype_0"), val = string("fp16")]; + string var_4584_to_fp16_dtype_0 = const()[name = string("op_4584_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_278_to_fp16 = cast(dtype = clip_278_to_fp16_dtype_0, x = clip_278)[name = string("cast_94")]; + tensor var_4584_to_fp16 = cast(dtype = var_4584_to_fp16_dtype_0, x = var_4584)[name = string("cast_95")]; + tensor normed_output_353_cast_fp16 = mul(x = clip_278_to_fp16, y = var_4584_to_fp16)[name = string("normed_output_353_cast_fp16")]; + tensor const_150_to_fp16 = const()[name = string("const_150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121828928)))]; + tensor normed_output_355_cast_fp16 = mul(x = normed_output_353_cast_fp16, y = const_150_to_fp16)[name = string("normed_output_355_cast_fp16")]; + fp16 var_4529_to_fp16 = const()[name = string("op_4529_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_1023_cast_fp16 = mul(x = normed_output_355_cast_fp16, y = var_4529_to_fp16)[name = string("hidden_states_1023_cast_fp16")]; + tensor hidden_states_1025_cast_fp16 = add(x = hidden_states_1023_cast_fp16, y = hidden_states_1001_cast_fp16)[name = string("hidden_states_1025_cast_fp16")]; + fp16 var_4591_to_fp16 = const()[name = string("op_4591_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_4592_to_fp16 = const()[name = string("op_4592_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_279_cast_fp16 = clip(alpha = var_4591_to_fp16, beta = var_4592_to_fp16, x = hidden_states_1025_cast_fp16)[name = string("clip_279_cast_fp16")]; + string clip_279_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_279_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4594 = const()[name = string("op_4594"), val = fp32(-0x1p-1)]; + fp32 var_4598_promoted = const()[name = string("op_4598_promoted"), val = fp32(0x1p+1)]; + tensor clip_279_cast_fp16_to_fp32 = cast(dtype = clip_279_cast_fp16_to_fp32_dtype_0, x = clip_279_cast_fp16)[name = string("cast_93")]; + tensor var_4604 = pow(x = clip_279_cast_fp16_to_fp32, y = var_4598_promoted)[name = string("op_4604")]; + tensor var_4606_axes_0 = const()[name = string("op_4606_axes_0"), val = tensor([-1])]; + bool var_4606_keep_dims_0 = const()[name = string("op_4606_keep_dims_0"), val = bool(true)]; + tensor var_4606 = reduce_mean(axes = var_4606_axes_0, keep_dims = var_4606_keep_dims_0, x = var_4604)[name = string("op_4606")]; + string var_4606_to_fp16_dtype_0 = const()[name = string("op_4606_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4607_to_fp16 = const()[name = string("op_4607_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4606_to_fp16 = cast(dtype = var_4606_to_fp16_dtype_0, x = var_4606)[name = string("cast_92")]; + tensor mean_squared_179_cast_fp16 = add(x = var_4606_to_fp16, y = var_4607_to_fp16)[name = string("mean_squared_179_cast_fp16")]; + string mean_squared_179_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_179_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_179_cast_fp16_to_fp32 = cast(dtype = mean_squared_179_cast_fp16_to_fp32_dtype_0, x = mean_squared_179_cast_fp16)[name = string("cast_91")]; + tensor var_4609 = pow(x = mean_squared_179_cast_fp16_to_fp32, y = var_4594)[name = string("op_4609")]; + string var_4609_to_fp16_dtype_0 = const()[name = string("op_4609_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4609_to_fp16 = cast(dtype = var_4609_to_fp16_dtype_0, x = var_4609)[name = string("cast_90")]; + tensor normed_output_357_cast_fp16 = mul(x = clip_279_cast_fp16, y = var_4609_to_fp16)[name = string("normed_output_357_cast_fp16")]; + tensor const_151_to_fp16 = const()[name = string("const_151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121831040)))]; + tensor normed_output_359_cast_fp16 = mul(x = normed_output_357_cast_fp16, y = const_151_to_fp16)[name = string("normed_output_359_cast_fp16")]; + string normed_output_359_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_359_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4622 = const()[name = string("op_4622"), val = fp32(-0x1p-1)]; + fp32 var_4623 = const()[name = string("op_4623"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_4624 = const()[name = string("op_4624"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_359_cast_fp16_to_fp32 = cast(dtype = normed_output_359_cast_fp16_to_fp32_dtype_0, x = normed_output_359_cast_fp16)[name = string("cast_89")]; + tensor clip_280 = clip(alpha = var_4624, beta = var_4623, x = normed_output_359_cast_fp16_to_fp32)[name = string("clip_280")]; + fp32 var_4618_promoted = const()[name = string("op_4618_promoted"), val = fp32(0x1p+1)]; + tensor var_4632 = pow(x = clip_280, y = var_4618_promoted)[name = string("op_4632")]; + tensor var_4634_axes_0 = const()[name = string("op_4634_axes_0"), val = tensor([-1])]; + bool var_4634_keep_dims_0 = const()[name = string("op_4634_keep_dims_0"), val = bool(true)]; + tensor var_4634 = reduce_mean(axes = var_4634_axes_0, keep_dims = var_4634_keep_dims_0, x = var_4632)[name = string("op_4634")]; + string var_4634_to_fp16_dtype_0 = const()[name = string("op_4634_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4635_to_fp16 = const()[name = string("op_4635_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4634_to_fp16 = cast(dtype = var_4634_to_fp16_dtype_0, x = var_4634)[name = string("cast_88")]; + tensor mean_squared_181_cast_fp16 = add(x = var_4634_to_fp16, y = var_4635_to_fp16)[name = string("mean_squared_181_cast_fp16")]; + string mean_squared_181_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_181_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_181_cast_fp16_to_fp32 = cast(dtype = mean_squared_181_cast_fp16_to_fp32_dtype_0, x = mean_squared_181_cast_fp16)[name = string("cast_87")]; + tensor var_4637 = pow(x = mean_squared_181_cast_fp16_to_fp32, y = var_4622)[name = string("op_4637")]; + string clip_280_to_fp16_dtype_0 = const()[name = string("clip_280_to_fp16_dtype_0"), val = string("fp16")]; + string var_4637_to_fp16_dtype_0 = const()[name = string("op_4637_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_280_to_fp16 = cast(dtype = clip_280_to_fp16_dtype_0, x = clip_280)[name = string("cast_85")]; + tensor var_4637_to_fp16 = cast(dtype = var_4637_to_fp16_dtype_0, x = var_4637)[name = string("cast_86")]; + tensor normed_output_361_cast_fp16 = mul(x = clip_280_to_fp16, y = var_4637_to_fp16)[name = string("normed_output_361_cast_fp16")]; + tensor const_152_to_fp16 = const()[name = string("const_152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121833152)))]; + tensor normed_output_363_cast_fp16 = mul(x = normed_output_361_cast_fp16, y = const_152_to_fp16)[name = string("normed_output_363_cast_fp16")]; + fp16 feed_forward1s_10_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_10_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.aap+2)]; + fp16 feed_forward1s_10_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_10_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.a6p+2)]; + tensor clip_281_cast_fp16 = clip(alpha = feed_forward1s_10_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_10_ffw_layer_1_input_max_to_fp16, x = normed_output_363_cast_fp16)[name = string("clip_281_cast_fp16")]; + tensor feed_forward1s_10_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121835264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123932480))))[name = string("feed_forward1s_10_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_111_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_10_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_281_cast_fp16)[name = string("linear_111_cast_fp16")]; + fp16 feed_forward1s_10_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_10_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.b4p+3)]; + fp16 feed_forward1s_10_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_10_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.b2p+3)]; + tensor clip_282_cast_fp16 = clip(alpha = feed_forward1s_10_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_10_ffw_layer_1_output_max_to_fp16, x = linear_111_cast_fp16)[name = string("clip_282_cast_fp16")]; + tensor hidden_states_1041_cast_fp16 = silu(x = clip_282_cast_fp16)[name = string("hidden_states_1041_cast_fp16")]; + fp16 feed_forward1s_10_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_10_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.1ap+3)]; + fp16 feed_forward1s_10_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_10_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.18p+3)]; + tensor clip_283_cast_fp16 = clip(alpha = feed_forward1s_10_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_10_ffw_layer_2_input_max_to_fp16, x = hidden_states_1041_cast_fp16)[name = string("clip_283_cast_fp16")]; + tensor feed_forward1s_10_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123936640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126033856))))[name = string("feed_forward1s_10_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_10_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_283_cast_fp16)[name = string("linear_112_cast_fp16")]; + fp16 feed_forward1s_10_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_10_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.8ep+5)]; + fp16 feed_forward1s_10_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_10_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.8cp+5)]; + tensor clip_284_cast_fp16 = clip(alpha = feed_forward1s_10_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_10_ffw_layer_2_output_max_to_fp16, x = linear_112_cast_fp16)[name = string("clip_284_cast_fp16")]; + string clip_284_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_284_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_284_cast_fp16_to_fp32 = cast(dtype = clip_284_cast_fp16_to_fp32_dtype_0, x = clip_284_cast_fp16)[name = string("cast_84")]; + tensor clip_285 = clip(alpha = var_4624, beta = var_4623, x = clip_284_cast_fp16_to_fp32)[name = string("clip_285")]; + fp32 var_4618_promoted_1 = const()[name = string("op_4618_promoted_1"), val = fp32(0x1p+1)]; + tensor var_4664 = pow(x = clip_285, y = var_4618_promoted_1)[name = string("op_4664")]; + tensor var_4666_axes_0 = const()[name = string("op_4666_axes_0"), val = tensor([-1])]; + bool var_4666_keep_dims_0 = const()[name = string("op_4666_keep_dims_0"), val = bool(true)]; + tensor var_4666 = reduce_mean(axes = var_4666_axes_0, keep_dims = var_4666_keep_dims_0, x = var_4664)[name = string("op_4666")]; + string var_4666_to_fp16_dtype_0 = const()[name = string("op_4666_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4667_to_fp16 = const()[name = string("op_4667_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4666_to_fp16 = cast(dtype = var_4666_to_fp16_dtype_0, x = var_4666)[name = string("cast_83")]; + tensor mean_squared_183_cast_fp16 = add(x = var_4666_to_fp16, y = var_4667_to_fp16)[name = string("mean_squared_183_cast_fp16")]; + string mean_squared_183_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_183_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_183_cast_fp16_to_fp32 = cast(dtype = mean_squared_183_cast_fp16_to_fp32_dtype_0, x = mean_squared_183_cast_fp16)[name = string("cast_82")]; + tensor var_4669 = pow(x = mean_squared_183_cast_fp16_to_fp32, y = var_4622)[name = string("op_4669")]; + string clip_285_to_fp16_dtype_0 = const()[name = string("clip_285_to_fp16_dtype_0"), val = string("fp16")]; + string var_4669_to_fp16_dtype_0 = const()[name = string("op_4669_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_285_to_fp16 = cast(dtype = clip_285_to_fp16_dtype_0, x = clip_285)[name = string("cast_80")]; + tensor var_4669_to_fp16 = cast(dtype = var_4669_to_fp16_dtype_0, x = var_4669)[name = string("cast_81")]; + tensor normed_output_365_cast_fp16 = mul(x = clip_285_to_fp16, y = var_4669_to_fp16)[name = string("normed_output_365_cast_fp16")]; + tensor const_153_to_fp16 = const()[name = string("const_153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126034944)))]; + tensor normed_output_367_cast_fp16 = mul(x = normed_output_365_cast_fp16, y = const_153_to_fp16)[name = string("normed_output_367_cast_fp16")]; + fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_1053_cast_fp16 = mul(x = normed_output_367_cast_fp16, y = var_4614_to_fp16)[name = string("hidden_states_1053_cast_fp16")]; + tensor hidden_states_1055_cast_fp16 = add(x = hidden_states_1053_cast_fp16, y = normed_output_359_cast_fp16)[name = string("hidden_states_1055_cast_fp16")]; + fp16 var_4676_to_fp16 = const()[name = string("op_4676_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_4677_to_fp16 = const()[name = string("op_4677_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_286_cast_fp16 = clip(alpha = var_4676_to_fp16, beta = var_4677_to_fp16, x = hidden_states_1055_cast_fp16)[name = string("clip_286_cast_fp16")]; + string clip_286_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_286_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4679 = const()[name = string("op_4679"), val = fp32(-0x1p-1)]; + fp32 var_4683_promoted = const()[name = string("op_4683_promoted"), val = fp32(0x1p+1)]; + tensor clip_286_cast_fp16_to_fp32 = cast(dtype = clip_286_cast_fp16_to_fp32_dtype_0, x = clip_286_cast_fp16)[name = string("cast_79")]; + tensor var_4689 = pow(x = clip_286_cast_fp16_to_fp32, y = var_4683_promoted)[name = string("op_4689")]; + tensor var_4691_axes_0 = const()[name = string("op_4691_axes_0"), val = tensor([-1])]; + bool var_4691_keep_dims_0 = const()[name = string("op_4691_keep_dims_0"), val = bool(true)]; + tensor var_4691 = reduce_mean(axes = var_4691_axes_0, keep_dims = var_4691_keep_dims_0, x = var_4689)[name = string("op_4691")]; + string var_4691_to_fp16_dtype_0 = const()[name = string("op_4691_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4692_to_fp16 = const()[name = string("op_4692_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4691_to_fp16 = cast(dtype = var_4691_to_fp16_dtype_0, x = var_4691)[name = string("cast_78")]; + tensor mean_squared_185_cast_fp16 = add(x = var_4691_to_fp16, y = var_4692_to_fp16)[name = string("mean_squared_185_cast_fp16")]; + string mean_squared_185_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_185_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_185_cast_fp16_to_fp32 = cast(dtype = mean_squared_185_cast_fp16_to_fp32_dtype_0, x = mean_squared_185_cast_fp16)[name = string("cast_77")]; + tensor var_4694 = pow(x = mean_squared_185_cast_fp16_to_fp32, y = var_4679)[name = string("op_4694")]; + string var_4694_to_fp16_dtype_0 = const()[name = string("op_4694_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4694_to_fp16 = cast(dtype = var_4694_to_fp16_dtype_0, x = var_4694)[name = string("cast_76")]; + tensor normed_output_369_cast_fp16 = mul(x = clip_286_cast_fp16, y = var_4694_to_fp16)[name = string("normed_output_369_cast_fp16")]; + tensor const_154_to_fp16 = const()[name = string("const_154_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126037056)))]; + tensor normed_output_371_cast_fp16 = mul(x = normed_output_369_cast_fp16, y = const_154_to_fp16)[name = string("normed_output_371_cast_fp16")]; + int32 var_4700 = const()[name = string("op_4700"), val = int32(-1)]; + fp32 var_4701 = const()[name = string("op_4701"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_10_q_proj_input_min_to_fp16 = const()[name = string("self_attns_10_q_proj_input_min_to_fp16"), val = fp16(-0x1.4ep+3)]; + fp16 self_attns_10_q_proj_input_max_to_fp16 = const()[name = string("self_attns_10_q_proj_input_max_to_fp16"), val = fp16(0x1.4ap+3)]; + tensor clip_287_cast_fp16 = clip(alpha = self_attns_10_q_proj_input_min_to_fp16, beta = self_attns_10_q_proj_input_max_to_fp16, x = normed_output_371_cast_fp16)[name = string("clip_287_cast_fp16")]; + tensor self_attns_10_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126039168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126563520))))[name = string("self_attns_10_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_113_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_10_q_proj_linear_weight_to_fp16_palettized, x = clip_287_cast_fp16)[name = string("linear_113_cast_fp16")]; + fp16 self_attns_10_q_proj_output_min_to_fp16 = const()[name = string("self_attns_10_q_proj_output_min_to_fp16"), val = fp16(-0x1.36p+4)]; + fp16 self_attns_10_q_proj_output_max_to_fp16 = const()[name = string("self_attns_10_q_proj_output_max_to_fp16"), val = fp16(0x1.32p+4)]; + tensor clip_288_cast_fp16 = clip(alpha = self_attns_10_q_proj_output_min_to_fp16, beta = self_attns_10_q_proj_output_max_to_fp16, x = linear_113_cast_fp16)[name = string("clip_288_cast_fp16")]; + tensor var_4745 = const()[name = string("op_4745"), val = tensor([1, 50, 8, 128])]; + tensor q_21_cast_fp16 = reshape(shape = var_4745, x = clip_288_cast_fp16)[name = string("q_21_cast_fp16")]; + tensor self_attns_10_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126564608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127088960))))[name = string("self_attns_10_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_114_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_10_k_proj_linear_weight_to_fp16_palettized, x = clip_287_cast_fp16)[name = string("linear_114_cast_fp16")]; + fp16 self_attns_10_k_proj_output_min_to_fp16 = const()[name = string("self_attns_10_k_proj_output_min_to_fp16"), val = fp16(-0x1.36p+4)]; + fp16 self_attns_10_k_proj_output_max_to_fp16 = const()[name = string("self_attns_10_k_proj_output_max_to_fp16"), val = fp16(0x1.32p+4)]; + tensor clip_290_cast_fp16 = clip(alpha = self_attns_10_k_proj_output_min_to_fp16, beta = self_attns_10_k_proj_output_max_to_fp16, x = linear_114_cast_fp16)[name = string("clip_290_cast_fp16")]; + tensor var_4757 = const()[name = string("op_4757"), val = tensor([1, 50, 8, 128])]; + tensor k_21_cast_fp16 = reshape(shape = var_4757, x = clip_290_cast_fp16)[name = string("k_21_cast_fp16")]; + tensor self_attns_10_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127090048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127614400))))[name = string("self_attns_10_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_115_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_10_v_proj_linear_weight_to_fp16_palettized, x = clip_287_cast_fp16)[name = string("linear_115_cast_fp16")]; + fp16 self_attns_10_v_proj_output_min_to_fp16 = const()[name = string("self_attns_10_v_proj_output_min_to_fp16"), val = fp16(-0x1.36p+4)]; + fp16 self_attns_10_v_proj_output_max_to_fp16 = const()[name = string("self_attns_10_v_proj_output_max_to_fp16"), val = fp16(0x1.32p+4)]; + tensor clip_292_cast_fp16 = clip(alpha = self_attns_10_v_proj_output_min_to_fp16, beta = self_attns_10_v_proj_output_max_to_fp16, x = linear_115_cast_fp16)[name = string("clip_292_cast_fp16")]; + tensor var_4769 = const()[name = string("op_4769"), val = tensor([1, 50, 8, 128])]; + tensor input_453_cast_fp16 = reshape(shape = var_4769, x = clip_292_cast_fp16)[name = string("input_453_cast_fp16")]; + fp16 var_4771_to_fp16 = const()[name = string("op_4771_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_4772_cast_fp16 = mul(x = q_21_cast_fp16, y = var_4771_to_fp16)[name = string("op_4772_cast_fp16")]; + tensor var_4773_to_fp16 = const()[name = string("op_4773_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127615488)))]; + tensor input_449_cast_fp16 = mul(x = var_4772_cast_fp16, y = var_4773_to_fp16)[name = string("input_449_cast_fp16")]; + fp16 var_4775_to_fp16 = const()[name = string("op_4775_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_451_cast_fp16 = mul(x = k_21_cast_fp16, y = var_4775_to_fp16)[name = string("input_451_cast_fp16")]; + tensor q_padded_21_pad_0 = const()[name = string("q_padded_21_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_21_mode_0 = const()[name = string("q_padded_21_mode_0"), val = string("constant")]; + fp16 const_155_to_fp16 = const()[name = string("const_155_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_21_cast_fp16 = pad(constant_val = const_155_to_fp16, mode = q_padded_21_mode_0, pad = q_padded_21_pad_0, x = input_449_cast_fp16)[name = string("q_padded_21_cast_fp16")]; + tensor var_4779 = const()[name = string("op_4779"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_21_cast_fp16 = reshape(shape = var_4779, x = q_padded_21_cast_fp16)[name = string("q_blocks_21_cast_fp16")]; + tensor k_padded_21_pad_0 = const()[name = string("k_padded_21_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_21_mode_0 = const()[name = string("k_padded_21_mode_0"), val = string("constant")]; + fp16 const_156_to_fp16 = const()[name = string("const_156_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_21_cast_fp16 = pad(constant_val = const_156_to_fp16, mode = k_padded_21_mode_0, pad = k_padded_21_pad_0, x = input_451_cast_fp16)[name = string("k_padded_21_cast_fp16")]; + tensor v_padded_21_pad_0 = const()[name = string("v_padded_21_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_21_mode_0 = const()[name = string("v_padded_21_mode_0"), val = string("constant")]; + fp16 const_157_to_fp16 = const()[name = string("const_157_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_21_cast_fp16 = pad(constant_val = const_157_to_fp16, mode = v_padded_21_mode_0, pad = v_padded_21_pad_0, x = input_453_cast_fp16)[name = string("v_padded_21_cast_fp16")]; + tensor var_4786_begin_0 = const()[name = string("op_4786_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4786_end_0 = const()[name = string("op_4786_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_4786_end_mask_0 = const()[name = string("op_4786_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4786_cast_fp16 = slice_by_index(begin = var_4786_begin_0, end = var_4786_end_0, end_mask = var_4786_end_mask_0, x = k_padded_21_cast_fp16)[name = string("op_4786_cast_fp16")]; + tensor var_4788_begin_0 = const()[name = string("op_4788_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_4788_end_0 = const()[name = string("op_4788_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_4788_end_mask_0 = const()[name = string("op_4788_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4788_cast_fp16 = slice_by_index(begin = var_4788_begin_0, end = var_4788_end_0, end_mask = var_4788_end_mask_0, x = k_padded_21_cast_fp16)[name = string("op_4788_cast_fp16")]; + tensor var_4790_begin_0 = const()[name = string("op_4790_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_4790_end_0 = const()[name = string("op_4790_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_4790_end_mask_0 = const()[name = string("op_4790_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4790_cast_fp16 = slice_by_index(begin = var_4790_begin_0, end = var_4790_end_0, end_mask = var_4790_end_mask_0, x = k_padded_21_cast_fp16)[name = string("op_4790_cast_fp16")]; + tensor var_4792_begin_0 = const()[name = string("op_4792_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_4792_end_0 = const()[name = string("op_4792_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_4792_end_mask_0 = const()[name = string("op_4792_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4792_cast_fp16 = slice_by_index(begin = var_4792_begin_0, end = var_4792_end_0, end_mask = var_4792_end_mask_0, x = k_padded_21_cast_fp16)[name = string("op_4792_cast_fp16")]; + tensor var_4794_begin_0 = const()[name = string("op_4794_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_4794_end_0 = const()[name = string("op_4794_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_4794_end_mask_0 = const()[name = string("op_4794_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4794_cast_fp16 = slice_by_index(begin = var_4794_begin_0, end = var_4794_end_0, end_mask = var_4794_end_mask_0, x = k_padded_21_cast_fp16)[name = string("op_4794_cast_fp16")]; + int32 k_blocks_21_axis_0 = const()[name = string("k_blocks_21_axis_0"), val = int32(1)]; + tensor k_blocks_21_cast_fp16 = stack(axis = k_blocks_21_axis_0, values = (var_4786_cast_fp16, var_4788_cast_fp16, var_4790_cast_fp16, var_4792_cast_fp16, var_4794_cast_fp16))[name = string("k_blocks_21_cast_fp16")]; + tensor var_4798_begin_0 = const()[name = string("op_4798_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4798_end_0 = const()[name = string("op_4798_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_4798_end_mask_0 = const()[name = string("op_4798_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4798_cast_fp16 = slice_by_index(begin = var_4798_begin_0, end = var_4798_end_0, end_mask = var_4798_end_mask_0, x = v_padded_21_cast_fp16)[name = string("op_4798_cast_fp16")]; + tensor var_4800_begin_0 = const()[name = string("op_4800_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_4800_end_0 = const()[name = string("op_4800_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_4800_end_mask_0 = const()[name = string("op_4800_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4800_cast_fp16 = slice_by_index(begin = var_4800_begin_0, end = var_4800_end_0, end_mask = var_4800_end_mask_0, x = v_padded_21_cast_fp16)[name = string("op_4800_cast_fp16")]; + tensor var_4802_begin_0 = const()[name = string("op_4802_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_4802_end_0 = const()[name = string("op_4802_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_4802_end_mask_0 = const()[name = string("op_4802_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4802_cast_fp16 = slice_by_index(begin = var_4802_begin_0, end = var_4802_end_0, end_mask = var_4802_end_mask_0, x = v_padded_21_cast_fp16)[name = string("op_4802_cast_fp16")]; + tensor var_4804_begin_0 = const()[name = string("op_4804_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_4804_end_0 = const()[name = string("op_4804_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_4804_end_mask_0 = const()[name = string("op_4804_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4804_cast_fp16 = slice_by_index(begin = var_4804_begin_0, end = var_4804_end_0, end_mask = var_4804_end_mask_0, x = v_padded_21_cast_fp16)[name = string("op_4804_cast_fp16")]; + tensor var_4806_begin_0 = const()[name = string("op_4806_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_4806_end_0 = const()[name = string("op_4806_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_4806_end_mask_0 = const()[name = string("op_4806_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4806_cast_fp16 = slice_by_index(begin = var_4806_begin_0, end = var_4806_end_0, end_mask = var_4806_end_mask_0, x = v_padded_21_cast_fp16)[name = string("op_4806_cast_fp16")]; + int32 v_blocks_21_axis_0 = const()[name = string("v_blocks_21_axis_0"), val = int32(1)]; + tensor v_blocks_21_cast_fp16 = stack(axis = v_blocks_21_axis_0, values = (var_4798_cast_fp16, var_4800_cast_fp16, var_4802_cast_fp16, var_4804_cast_fp16, var_4806_cast_fp16))[name = string("v_blocks_21_cast_fp16")]; + tensor var_4814 = const()[name = string("op_4814"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_4816 = const()[name = string("op_4816"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_21_transpose_x_0 = const()[name = string("matrix_ac_21_transpose_x_0"), val = bool(false)]; + bool matrix_ac_21_transpose_y_0 = const()[name = string("matrix_ac_21_transpose_y_0"), val = bool(false)]; + tensor queries_21_cast_fp16 = transpose(perm = var_4814, x = q_blocks_21_cast_fp16)[name = string("transpose_10")]; + tensor keys_t_21_cast_fp16 = transpose(perm = var_4816, x = k_blocks_21_cast_fp16)[name = string("transpose_11")]; + tensor matrix_ac_21_cast_fp16 = matmul(transpose_x = matrix_ac_21_transpose_x_0, transpose_y = matrix_ac_21_transpose_y_0, x = queries_21_cast_fp16, y = keys_t_21_cast_fp16)[name = string("matrix_ac_21_cast_fp16")]; + tensor var_4819 = const()[name = string("op_4819"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_21_cast_fp16 = reshape(shape = var_4819, x = queries_21_cast_fp16)[name = string("q_flat_21_cast_fp16")]; + bool matrix_bd_101_transpose_x_0 = const()[name = string("matrix_bd_101_transpose_x_0"), val = bool(false)]; + bool matrix_bd_101_transpose_y_0 = const()[name = string("matrix_bd_101_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_21_to_fp16 = const()[name = string("rel_k_t_21_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127615808)))]; + tensor matrix_bd_101_cast_fp16 = matmul(transpose_x = matrix_bd_101_transpose_x_0, transpose_y = matrix_bd_101_transpose_y_0, x = q_flat_21_cast_fp16, y = rel_k_t_21_to_fp16)[name = string("matrix_bd_101_cast_fp16")]; + tensor var_4824 = const()[name = string("op_4824"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_455_cast_fp16 = reshape(shape = var_4824, x = matrix_bd_101_cast_fp16)[name = string("input_455_cast_fp16")]; + tensor matrix_bd_103_pad_0 = const()[name = string("matrix_bd_103_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127642496)))]; + string matrix_bd_103_mode_0 = const()[name = string("matrix_bd_103_mode_0"), val = string("constant")]; + fp16 const_159_to_fp16 = const()[name = string("const_159_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_103_cast_fp16 = pad(constant_val = const_159_to_fp16, mode = matrix_bd_103_mode_0, pad = matrix_bd_103_pad_0, x = input_455_cast_fp16)[name = string("matrix_bd_103_cast_fp16")]; + tensor var_4828 = const()[name = string("op_4828"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_105_cast_fp16 = reshape(shape = var_4828, x = matrix_bd_103_cast_fp16)[name = string("matrix_bd_105_cast_fp16")]; + tensor matrix_bd_107_begin_0 = const()[name = string("matrix_bd_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_107_end_0 = const()[name = string("matrix_bd_107_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_107_end_mask_0 = const()[name = string("matrix_bd_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_107_cast_fp16 = slice_by_index(begin = matrix_bd_107_begin_0, end = matrix_bd_107_end_0, end_mask = matrix_bd_107_end_mask_0, x = matrix_bd_105_cast_fp16)[name = string("matrix_bd_107_cast_fp16")]; + tensor var_4834 = const()[name = string("op_4834"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_109_cast_fp16 = reshape(shape = var_4834, x = matrix_bd_107_cast_fp16)[name = string("matrix_bd_109_cast_fp16")]; + tensor attn_61_cast_fp16 = add(x = matrix_ac_21_cast_fp16, y = matrix_bd_109_cast_fp16)[name = string("attn_61_cast_fp16")]; + fp16 _inversed_4837_y_0_to_fp16 = const()[name = string("_inversed_4837_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_4837_cast_fp16 = mul(x = attn_61_cast_fp16, y = _inversed_4837_y_0_to_fp16)[name = string("_inversed_4837_cast_fp16")]; + string _inversed_4837_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_4837_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_4837_cast_fp16_to_fp32 = cast(dtype = _inversed_4837_cast_fp16_to_fp32_dtype_0, x = _inversed_4837_cast_fp16)[name = string("cast_75")]; + tensor var_4838 = tanh(x = _inversed_4837_cast_fp16_to_fp32)[name = string("op_4838")]; + string var_4838_to_fp16_dtype_0 = const()[name = string("op_4838_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_10_softcap_to_fp16 = const()[name = string("self_attns_10_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_4838_to_fp16 = cast(dtype = var_4838_to_fp16_dtype_0, x = var_4838)[name = string("cast_74")]; + tensor attn_63_cast_fp16 = mul(x = var_4838_to_fp16, y = self_attns_10_softcap_to_fp16)[name = string("attn_63_cast_fp16")]; + string attn_63_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_63_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_63_cast_fp16_to_fp32 = cast(dtype = attn_63_cast_fp16_to_fp32_dtype_0, x = attn_63_cast_fp16)[name = string("cast_73")]; + tensor input_457 = select(a = var_4701, b = attn_63_cast_fp16_to_fp32, cond = var_460)[name = string("input_457")]; + tensor var_4842 = softmax(axis = var_4700, x = input_457)[name = string("op_4842")]; + tensor var_4844 = const()[name = string("op_4844"), val = tensor([0, 3, 1, -3, -1])]; + bool out_61_transpose_x_0 = const()[name = string("out_61_transpose_x_0"), val = bool(false)]; + bool out_61_transpose_y_0 = const()[name = string("out_61_transpose_y_0"), val = bool(false)]; + string var_4842_to_fp16_dtype_0 = const()[name = string("op_4842_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_21_cast_fp16 = transpose(perm = var_4844, x = v_blocks_21_cast_fp16)[name = string("transpose_9")]; + tensor var_4842_to_fp16 = cast(dtype = var_4842_to_fp16_dtype_0, x = var_4842)[name = string("cast_72")]; + tensor out_61_cast_fp16 = matmul(transpose_x = out_61_transpose_x_0, transpose_y = out_61_transpose_y_0, x = var_4842_to_fp16, y = values_t_21_cast_fp16)[name = string("out_61_cast_fp16")]; + tensor var_4847 = const()[name = string("op_4847"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_4849 = const()[name = string("op_4849"), val = tensor([1, 60, 1024])]; + tensor var_4848_cast_fp16 = transpose(perm = var_4847, x = out_61_cast_fp16)[name = string("transpose_8")]; + tensor out_63_cast_fp16 = reshape(shape = var_4849, x = var_4848_cast_fp16)[name = string("out_63_cast_fp16")]; + tensor var_4852_begin_0 = const()[name = string("op_4852_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4852_end_0 = const()[name = string("op_4852_end_0"), val = tensor([1, 50, 1024])]; + tensor var_4852_end_mask_0 = const()[name = string("op_4852_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4852_cast_fp16 = slice_by_index(begin = var_4852_begin_0, end = var_4852_end_0, end_mask = var_4852_end_mask_0, x = out_63_cast_fp16)[name = string("op_4852_cast_fp16")]; + fp16 self_attns_10_post_input_min_to_fp16 = const()[name = string("self_attns_10_post_input_min_to_fp16"), val = fp16(-0x1.3p+4)]; + fp16 self_attns_10_post_input_max_to_fp16 = const()[name = string("self_attns_10_post_input_max_to_fp16"), val = fp16(0x1.2cp+4)]; + tensor clip_293_cast_fp16 = clip(alpha = self_attns_10_post_input_min_to_fp16, beta = self_attns_10_post_input_max_to_fp16, x = var_4852_cast_fp16)[name = string("clip_293_cast_fp16")]; + tensor self_attns_10_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127642624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128166976))))[name = string("self_attns_10_post_linear_weight_to_fp16_palettized")]; + tensor linear_117_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_10_post_linear_weight_to_fp16_palettized, x = clip_293_cast_fp16)[name = string("linear_117_cast_fp16")]; + fp16 self_attns_10_post_output_min_to_fp16 = const()[name = string("self_attns_10_post_output_min_to_fp16"), val = fp16(-0x1.9p+6)]; + fp16 self_attns_10_post_output_max_to_fp16 = const()[name = string("self_attns_10_post_output_max_to_fp16"), val = fp16(0x1.8cp+6)]; + tensor clip_294_cast_fp16 = clip(alpha = self_attns_10_post_output_min_to_fp16, beta = self_attns_10_post_output_max_to_fp16, x = linear_117_cast_fp16)[name = string("clip_294_cast_fp16")]; + fp16 var_4864_to_fp16 = const()[name = string("op_4864_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_4865_to_fp16 = const()[name = string("op_4865_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_295_cast_fp16 = clip(alpha = var_4864_to_fp16, beta = var_4865_to_fp16, x = clip_294_cast_fp16)[name = string("clip_295_cast_fp16")]; + string clip_295_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_295_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4867 = const()[name = string("op_4867"), val = fp32(-0x1p-1)]; + fp32 var_4871_promoted = const()[name = string("op_4871_promoted"), val = fp32(0x1p+1)]; + tensor clip_295_cast_fp16_to_fp32 = cast(dtype = clip_295_cast_fp16_to_fp32_dtype_0, x = clip_295_cast_fp16)[name = string("cast_71")]; + tensor var_4877 = pow(x = clip_295_cast_fp16_to_fp32, y = var_4871_promoted)[name = string("op_4877")]; + tensor var_4879_axes_0 = const()[name = string("op_4879_axes_0"), val = tensor([-1])]; + bool var_4879_keep_dims_0 = const()[name = string("op_4879_keep_dims_0"), val = bool(true)]; + tensor var_4879 = reduce_mean(axes = var_4879_axes_0, keep_dims = var_4879_keep_dims_0, x = var_4877)[name = string("op_4879")]; + string var_4879_to_fp16_dtype_0 = const()[name = string("op_4879_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4880_to_fp16 = const()[name = string("op_4880_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4879_to_fp16 = cast(dtype = var_4879_to_fp16_dtype_0, x = var_4879)[name = string("cast_70")]; + tensor mean_squared_187_cast_fp16 = add(x = var_4879_to_fp16, y = var_4880_to_fp16)[name = string("mean_squared_187_cast_fp16")]; + string mean_squared_187_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_187_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_187_cast_fp16_to_fp32 = cast(dtype = mean_squared_187_cast_fp16_to_fp32_dtype_0, x = mean_squared_187_cast_fp16)[name = string("cast_69")]; + tensor var_4882 = pow(x = mean_squared_187_cast_fp16_to_fp32, y = var_4867)[name = string("op_4882")]; + string var_4882_to_fp16_dtype_0 = const()[name = string("op_4882_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4882_to_fp16 = cast(dtype = var_4882_to_fp16_dtype_0, x = var_4882)[name = string("cast_68")]; + tensor normed_output_373_cast_fp16 = mul(x = clip_295_cast_fp16, y = var_4882_to_fp16)[name = string("normed_output_373_cast_fp16")]; + tensor const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128168064)))]; + tensor normed_output_375_cast_fp16 = mul(x = normed_output_373_cast_fp16, y = const_160_to_fp16)[name = string("normed_output_375_cast_fp16")]; + tensor hidden_states_1081_cast_fp16 = add(x = normed_output_375_cast_fp16, y = hidden_states_1055_cast_fp16)[name = string("hidden_states_1081_cast_fp16")]; + string hidden_states_1081_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_1081_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4889 = const()[name = string("op_4889"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_4890 = const()[name = string("op_4890"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_4902 = const()[name = string("op_4902"), val = fp32(-0x1p-1)]; + fp32 var_4898_promoted = const()[name = string("op_4898_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_1081_cast_fp16_to_fp32 = cast(dtype = hidden_states_1081_cast_fp16_to_fp32_dtype_0, x = hidden_states_1081_cast_fp16)[name = string("cast_67")]; + tensor var_4910 = pow(x = hidden_states_1081_cast_fp16_to_fp32, y = var_4898_promoted)[name = string("op_4910")]; + tensor var_4912_axes_0 = const()[name = string("op_4912_axes_0"), val = tensor([-1])]; + bool var_4912_keep_dims_0 = const()[name = string("op_4912_keep_dims_0"), val = bool(true)]; + tensor var_4912 = reduce_mean(axes = var_4912_axes_0, keep_dims = var_4912_keep_dims_0, x = var_4910)[name = string("op_4912")]; + string var_4912_to_fp16_dtype_0 = const()[name = string("op_4912_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4913_to_fp16 = const()[name = string("op_4913_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4912_to_fp16 = cast(dtype = var_4912_to_fp16_dtype_0, x = var_4912)[name = string("cast_66")]; + tensor mean_squared_189_cast_fp16 = add(x = var_4912_to_fp16, y = var_4913_to_fp16)[name = string("mean_squared_189_cast_fp16")]; + string mean_squared_189_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_189_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_189_cast_fp16_to_fp32 = cast(dtype = mean_squared_189_cast_fp16_to_fp32_dtype_0, x = mean_squared_189_cast_fp16)[name = string("cast_65")]; + tensor var_4915 = pow(x = mean_squared_189_cast_fp16_to_fp32, y = var_4902)[name = string("op_4915")]; + string var_4915_to_fp16_dtype_0 = const()[name = string("op_4915_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_4915_to_fp16 = cast(dtype = var_4915_to_fp16_dtype_0, x = var_4915)[name = string("cast_64")]; + tensor normed_output_377_cast_fp16 = mul(x = hidden_states_1081_cast_fp16, y = var_4915_to_fp16)[name = string("normed_output_377_cast_fp16")]; + tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128170176)))]; + tensor normed_output_379_cast_fp16 = mul(x = normed_output_377_cast_fp16, y = const_161_to_fp16)[name = string("normed_output_379_cast_fp16")]; + fp16 lconv1ds_10_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_10_linear_start_input_min_to_fp16"), val = fp16(-0x1.4ap+3)]; + fp16 lconv1ds_10_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_10_linear_start_input_max_to_fp16"), val = fp16(0x1.48p+3)]; + tensor clip_296_cast_fp16 = clip(alpha = lconv1ds_10_linear_start_input_min_to_fp16, beta = lconv1ds_10_linear_start_input_max_to_fp16, x = normed_output_379_cast_fp16)[name = string("clip_296_cast_fp16")]; + tensor lconv1ds_10_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128172288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129220928))))[name = string("lconv1ds_10_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_118_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_10_linear_start_linear_weight_to_fp16_palettized, x = clip_296_cast_fp16)[name = string("linear_118_cast_fp16")]; + fp16 lconv1ds_10_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_10_linear_start_output_min_to_fp16"), val = fp16(-0x1.bap+4)]; + fp16 lconv1ds_10_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_10_linear_start_output_max_to_fp16"), val = fp16(0x1.b6p+4)]; + tensor clip_297_cast_fp16 = clip(alpha = lconv1ds_10_linear_start_output_min_to_fp16, beta = lconv1ds_10_linear_start_output_max_to_fp16, x = linear_118_cast_fp16)[name = string("clip_297_cast_fp16")]; + int32 hidden_states_1089_split_num_splits_0 = const()[name = string("hidden_states_1089_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_1089_split_axis_0 = const()[name = string("hidden_states_1089_split_axis_0"), val = int32(-1)]; + tensor hidden_states_1089_split_cast_fp16_0, tensor hidden_states_1089_split_cast_fp16_1 = split(axis = hidden_states_1089_split_axis_0, num_splits = hidden_states_1089_split_num_splits_0, x = clip_297_cast_fp16)[name = string("hidden_states_1089_split_cast_fp16")]; + tensor hidden_states_1089_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_1089_split_cast_fp16_1)[name = string("hidden_states_1089_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_1089_cast_fp16 = mul(x = hidden_states_1089_split_cast_fp16_0, y = hidden_states_1089_split_1_sigmoid_cast_fp16)[name = string("hidden_states_1089_cast_fp16")]; + tensor input_465_perm_0 = const()[name = string("input_465_perm_0"), val = tensor([0, 2, 1])]; + tensor input_467_pad_0 = const()[name = string("input_467_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_467_mode_0 = const()[name = string("input_467_mode_0"), val = string("constant")]; + fp16 const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = fp16(0x0p+0)]; + tensor input_465_cast_fp16 = transpose(perm = input_465_perm_0, x = hidden_states_1089_cast_fp16)[name = string("transpose_7")]; + tensor input_467_cast_fp16 = pad(constant_val = const_162_to_fp16, mode = input_467_mode_0, pad = input_467_pad_0, x = input_465_cast_fp16)[name = string("input_467_cast_fp16")]; + string var_4941_pad_type_0 = const()[name = string("op_4941_pad_type_0"), val = string("valid")]; + int32 var_4941_groups_0 = const()[name = string("op_4941_groups_0"), val = int32(1024)]; + tensor var_4941_strides_0 = const()[name = string("op_4941_strides_0"), val = tensor([1])]; + tensor var_4941_pad_0 = const()[name = string("op_4941_pad_0"), val = tensor([0, 0])]; + tensor var_4941_dilations_0 = const()[name = string("op_4941_dilations_0"), val = tensor([1])]; + tensor lconv1ds_10_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129223040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129225664))))[name = string("lconv1ds_10_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_4941_cast_fp16 = conv(dilations = var_4941_dilations_0, groups = var_4941_groups_0, pad = var_4941_pad_0, pad_type = var_4941_pad_type_0, strides = var_4941_strides_0, weight = lconv1ds_10_depthwise_conv1d_weight_to_fp16_palettized, x = input_467_cast_fp16)[name = string("op_4941_cast_fp16")]; + tensor hidden_states_1091_perm_0 = const()[name = string("hidden_states_1091_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_1091_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_1091_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_1091_cast_fp16 = transpose(perm = hidden_states_1091_perm_0, x = var_4941_cast_fp16)[name = string("transpose_6")]; + tensor hidden_states_1091_cast_fp16_to_fp32 = cast(dtype = hidden_states_1091_cast_fp16_to_fp32_dtype_0, x = hidden_states_1091_cast_fp16)[name = string("cast_63")]; + tensor clip_298 = clip(alpha = var_4890, beta = var_4889, x = hidden_states_1091_cast_fp16_to_fp32)[name = string("clip_298")]; + fp32 var_4898_promoted_1 = const()[name = string("op_4898_promoted_1"), val = fp32(0x1p+1)]; + tensor var_4946 = pow(x = clip_298, y = var_4898_promoted_1)[name = string("op_4946")]; + tensor var_4948_axes_0 = const()[name = string("op_4948_axes_0"), val = tensor([-1])]; + bool var_4948_keep_dims_0 = const()[name = string("op_4948_keep_dims_0"), val = bool(true)]; + tensor var_4948 = reduce_mean(axes = var_4948_axes_0, keep_dims = var_4948_keep_dims_0, x = var_4946)[name = string("op_4948")]; + string var_4948_to_fp16_dtype_0 = const()[name = string("op_4948_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4949_to_fp16 = const()[name = string("op_4949_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4948_to_fp16 = cast(dtype = var_4948_to_fp16_dtype_0, x = var_4948)[name = string("cast_62")]; + tensor mean_squared_191_cast_fp16 = add(x = var_4948_to_fp16, y = var_4949_to_fp16)[name = string("mean_squared_191_cast_fp16")]; + string mean_squared_191_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_191_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_191_cast_fp16_to_fp32 = cast(dtype = mean_squared_191_cast_fp16_to_fp32_dtype_0, x = mean_squared_191_cast_fp16)[name = string("cast_61")]; + tensor var_4951 = pow(x = mean_squared_191_cast_fp16_to_fp32, y = var_4902)[name = string("op_4951")]; + string clip_298_to_fp16_dtype_0 = const()[name = string("clip_298_to_fp16_dtype_0"), val = string("fp16")]; + string var_4951_to_fp16_dtype_0 = const()[name = string("op_4951_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_298_to_fp16 = cast(dtype = clip_298_to_fp16_dtype_0, x = clip_298)[name = string("cast_59")]; + tensor var_4951_to_fp16 = cast(dtype = var_4951_to_fp16_dtype_0, x = var_4951)[name = string("cast_60")]; + tensor normed_output_381_cast_fp16 = mul(x = clip_298_to_fp16, y = var_4951_to_fp16)[name = string("normed_output_381_cast_fp16")]; + tensor const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129226752)))]; + tensor normed_output_383_cast_fp16 = mul(x = normed_output_381_cast_fp16, y = const_163_to_fp16)[name = string("normed_output_383_cast_fp16")]; + tensor hidden_states_1097_cast_fp16 = silu(x = normed_output_383_cast_fp16)[name = string("hidden_states_1097_cast_fp16")]; + fp16 lconv1ds_10_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_10_linear_end_input_min_to_fp16"), val = fp16(-0x1.f4p+3)]; + fp16 lconv1ds_10_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_10_linear_end_input_max_to_fp16"), val = fp16(0x1.fp+3)]; + tensor clip_299_cast_fp16 = clip(alpha = lconv1ds_10_linear_end_input_min_to_fp16, beta = lconv1ds_10_linear_end_input_max_to_fp16, x = hidden_states_1097_cast_fp16)[name = string("clip_299_cast_fp16")]; + tensor lconv1ds_10_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129228864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129753216))))[name = string("lconv1ds_10_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_10_linear_end_linear_weight_to_fp16_palettized, x = clip_299_cast_fp16)[name = string("linear_119_cast_fp16")]; + fp16 lconv1ds_10_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_10_linear_end_output_min_to_fp16"), val = fp16(-0x1.6p+3)]; + fp16 lconv1ds_10_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_10_linear_end_output_max_to_fp16"), val = fp16(0x1.5ep+3)]; + tensor clip_300_cast_fp16 = clip(alpha = lconv1ds_10_linear_end_output_min_to_fp16, beta = lconv1ds_10_linear_end_output_max_to_fp16, x = linear_119_cast_fp16)[name = string("clip_300_cast_fp16")]; + tensor hidden_states_1103_cast_fp16 = add(x = clip_300_cast_fp16, y = hidden_states_1081_cast_fp16)[name = string("hidden_states_1103_cast_fp16")]; + string hidden_states_1103_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_1103_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_4975 = const()[name = string("op_4975"), val = fp32(-0x1p-1)]; + fp32 var_4976 = const()[name = string("op_4976"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_4977 = const()[name = string("op_4977"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_1103_cast_fp16_to_fp32 = cast(dtype = hidden_states_1103_cast_fp16_to_fp32_dtype_0, x = hidden_states_1103_cast_fp16)[name = string("cast_58")]; + tensor clip_301 = clip(alpha = var_4977, beta = var_4976, x = hidden_states_1103_cast_fp16_to_fp32)[name = string("clip_301")]; + fp32 var_4971_promoted = const()[name = string("op_4971_promoted"), val = fp32(0x1p+1)]; + tensor var_4985 = pow(x = clip_301, y = var_4971_promoted)[name = string("op_4985")]; + tensor var_4987_axes_0 = const()[name = string("op_4987_axes_0"), val = tensor([-1])]; + bool var_4987_keep_dims_0 = const()[name = string("op_4987_keep_dims_0"), val = bool(true)]; + tensor var_4987 = reduce_mean(axes = var_4987_axes_0, keep_dims = var_4987_keep_dims_0, x = var_4985)[name = string("op_4987")]; + string var_4987_to_fp16_dtype_0 = const()[name = string("op_4987_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_4988_to_fp16 = const()[name = string("op_4988_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_4987_to_fp16 = cast(dtype = var_4987_to_fp16_dtype_0, x = var_4987)[name = string("cast_57")]; + tensor mean_squared_193_cast_fp16 = add(x = var_4987_to_fp16, y = var_4988_to_fp16)[name = string("mean_squared_193_cast_fp16")]; + string mean_squared_193_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_193_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_193_cast_fp16_to_fp32 = cast(dtype = mean_squared_193_cast_fp16_to_fp32_dtype_0, x = mean_squared_193_cast_fp16)[name = string("cast_56")]; + tensor var_4990 = pow(x = mean_squared_193_cast_fp16_to_fp32, y = var_4975)[name = string("op_4990")]; + string clip_301_to_fp16_dtype_0 = const()[name = string("clip_301_to_fp16_dtype_0"), val = string("fp16")]; + string var_4990_to_fp16_dtype_0 = const()[name = string("op_4990_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_301_to_fp16 = cast(dtype = clip_301_to_fp16_dtype_0, x = clip_301)[name = string("cast_54")]; + tensor var_4990_to_fp16 = cast(dtype = var_4990_to_fp16_dtype_0, x = var_4990)[name = string("cast_55")]; + tensor normed_output_385_cast_fp16 = mul(x = clip_301_to_fp16, y = var_4990_to_fp16)[name = string("normed_output_385_cast_fp16")]; + tensor const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129754304)))]; + tensor normed_output_387_cast_fp16 = mul(x = normed_output_385_cast_fp16, y = const_164_to_fp16)[name = string("normed_output_387_cast_fp16")]; + fp16 feed_forward2s_10_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_10_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.8ap+4)]; + fp16 feed_forward2s_10_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_10_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.88p+4)]; + tensor clip_302_cast_fp16 = clip(alpha = feed_forward2s_10_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_10_ffw_layer_1_input_max_to_fp16, x = normed_output_387_cast_fp16)[name = string("clip_302_cast_fp16")]; + tensor feed_forward2s_10_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129756416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131853632))))[name = string("feed_forward2s_10_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_120_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_10_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_302_cast_fp16)[name = string("linear_120_cast_fp16")]; + fp16 feed_forward2s_10_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_10_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.12p+6)]; + fp16 feed_forward2s_10_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_10_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.1p+6)]; + tensor clip_303_cast_fp16 = clip(alpha = feed_forward2s_10_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_10_ffw_layer_1_output_max_to_fp16, x = linear_120_cast_fp16)[name = string("clip_303_cast_fp16")]; + tensor hidden_states_1113_cast_fp16 = silu(x = clip_303_cast_fp16)[name = string("hidden_states_1113_cast_fp16")]; + fp16 feed_forward2s_10_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_10_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.76p+4)]; + fp16 feed_forward2s_10_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_10_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.72p+4)]; + tensor clip_304_cast_fp16 = clip(alpha = feed_forward2s_10_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_10_ffw_layer_2_input_max_to_fp16, x = hidden_states_1113_cast_fp16)[name = string("clip_304_cast_fp16")]; + tensor feed_forward2s_10_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131857792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133955008))))[name = string("feed_forward2s_10_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_121_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_10_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_304_cast_fp16)[name = string("linear_121_cast_fp16")]; + fp16 feed_forward2s_10_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_10_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.a8p+7)]; + fp16 feed_forward2s_10_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_10_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.a6p+7)]; + tensor clip_305_cast_fp16 = clip(alpha = feed_forward2s_10_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_10_ffw_layer_2_output_max_to_fp16, x = linear_121_cast_fp16)[name = string("clip_305_cast_fp16")]; + string clip_305_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_305_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_305_cast_fp16_to_fp32 = cast(dtype = clip_305_cast_fp16_to_fp32_dtype_0, x = clip_305_cast_fp16)[name = string("cast_53")]; + tensor clip_306 = clip(alpha = var_4977, beta = var_4976, x = clip_305_cast_fp16_to_fp32)[name = string("clip_306")]; + fp32 var_4971_promoted_1 = const()[name = string("op_4971_promoted_1"), val = fp32(0x1p+1)]; + tensor var_5017 = pow(x = clip_306, y = var_4971_promoted_1)[name = string("op_5017")]; + tensor var_5019_axes_0 = const()[name = string("op_5019_axes_0"), val = tensor([-1])]; + bool var_5019_keep_dims_0 = const()[name = string("op_5019_keep_dims_0"), val = bool(true)]; + tensor var_5019 = reduce_mean(axes = var_5019_axes_0, keep_dims = var_5019_keep_dims_0, x = var_5017)[name = string("op_5019")]; + string var_5019_to_fp16_dtype_0 = const()[name = string("op_5019_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5020_to_fp16 = const()[name = string("op_5020_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5019_to_fp16 = cast(dtype = var_5019_to_fp16_dtype_0, x = var_5019)[name = string("cast_52")]; + tensor mean_squared_195_cast_fp16 = add(x = var_5019_to_fp16, y = var_5020_to_fp16)[name = string("mean_squared_195_cast_fp16")]; + string mean_squared_195_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_195_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_195_cast_fp16_to_fp32 = cast(dtype = mean_squared_195_cast_fp16_to_fp32_dtype_0, x = mean_squared_195_cast_fp16)[name = string("cast_51")]; + tensor var_5022 = pow(x = mean_squared_195_cast_fp16_to_fp32, y = var_4975)[name = string("op_5022")]; + string clip_306_to_fp16_dtype_0 = const()[name = string("clip_306_to_fp16_dtype_0"), val = string("fp16")]; + string var_5022_to_fp16_dtype_0 = const()[name = string("op_5022_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_306_to_fp16 = cast(dtype = clip_306_to_fp16_dtype_0, x = clip_306)[name = string("cast_49")]; + tensor var_5022_to_fp16 = cast(dtype = var_5022_to_fp16_dtype_0, x = var_5022)[name = string("cast_50")]; + tensor normed_output_389_cast_fp16 = mul(x = clip_306_to_fp16, y = var_5022_to_fp16)[name = string("normed_output_389_cast_fp16")]; + tensor const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133956096)))]; + tensor normed_output_391_cast_fp16 = mul(x = normed_output_389_cast_fp16, y = const_165_to_fp16)[name = string("normed_output_391_cast_fp16")]; + fp16 var_4967_to_fp16 = const()[name = string("op_4967_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_1125_cast_fp16 = mul(x = normed_output_391_cast_fp16, y = var_4967_to_fp16)[name = string("hidden_states_1125_cast_fp16")]; + tensor hidden_states_1127_cast_fp16 = add(x = hidden_states_1125_cast_fp16, y = hidden_states_1103_cast_fp16)[name = string("hidden_states_1127_cast_fp16")]; + fp16 var_5029_to_fp16 = const()[name = string("op_5029_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_5030_to_fp16 = const()[name = string("op_5030_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_307_cast_fp16 = clip(alpha = var_5029_to_fp16, beta = var_5030_to_fp16, x = hidden_states_1127_cast_fp16)[name = string("clip_307_cast_fp16")]; + string clip_307_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_307_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_5032 = const()[name = string("op_5032"), val = fp32(-0x1p-1)]; + fp32 var_5036_promoted = const()[name = string("op_5036_promoted"), val = fp32(0x1p+1)]; + tensor clip_307_cast_fp16_to_fp32 = cast(dtype = clip_307_cast_fp16_to_fp32_dtype_0, x = clip_307_cast_fp16)[name = string("cast_48")]; + tensor var_5042 = pow(x = clip_307_cast_fp16_to_fp32, y = var_5036_promoted)[name = string("op_5042")]; + tensor var_5044_axes_0 = const()[name = string("op_5044_axes_0"), val = tensor([-1])]; + bool var_5044_keep_dims_0 = const()[name = string("op_5044_keep_dims_0"), val = bool(true)]; + tensor var_5044 = reduce_mean(axes = var_5044_axes_0, keep_dims = var_5044_keep_dims_0, x = var_5042)[name = string("op_5044")]; + string var_5044_to_fp16_dtype_0 = const()[name = string("op_5044_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5045_to_fp16 = const()[name = string("op_5045_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5044_to_fp16 = cast(dtype = var_5044_to_fp16_dtype_0, x = var_5044)[name = string("cast_47")]; + tensor mean_squared_197_cast_fp16 = add(x = var_5044_to_fp16, y = var_5045_to_fp16)[name = string("mean_squared_197_cast_fp16")]; + string mean_squared_197_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_197_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_197_cast_fp16_to_fp32 = cast(dtype = mean_squared_197_cast_fp16_to_fp32_dtype_0, x = mean_squared_197_cast_fp16)[name = string("cast_46")]; + tensor var_5047 = pow(x = mean_squared_197_cast_fp16_to_fp32, y = var_5032)[name = string("op_5047")]; + string var_5047_to_fp16_dtype_0 = const()[name = string("op_5047_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_5047_to_fp16 = cast(dtype = var_5047_to_fp16_dtype_0, x = var_5047)[name = string("cast_45")]; + tensor normed_output_393_cast_fp16 = mul(x = clip_307_cast_fp16, y = var_5047_to_fp16)[name = string("normed_output_393_cast_fp16")]; + tensor const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133958208)))]; + tensor normed_output_395_cast_fp16 = mul(x = normed_output_393_cast_fp16, y = const_166_to_fp16)[name = string("normed_output_395_cast_fp16")]; + string normed_output_395_cast_fp16_to_fp32_dtype_0 = const()[name = string("normed_output_395_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_5060 = const()[name = string("op_5060"), val = fp32(-0x1p-1)]; + fp32 var_5061 = const()[name = string("op_5061"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_5062 = const()[name = string("op_5062"), val = fp32(-0x1.2a05f2p+33)]; + tensor normed_output_395_cast_fp16_to_fp32 = cast(dtype = normed_output_395_cast_fp16_to_fp32_dtype_0, x = normed_output_395_cast_fp16)[name = string("cast_44")]; + tensor clip_308 = clip(alpha = var_5062, beta = var_5061, x = normed_output_395_cast_fp16_to_fp32)[name = string("clip_308")]; + fp32 var_5056_promoted = const()[name = string("op_5056_promoted"), val = fp32(0x1p+1)]; + tensor var_5070 = pow(x = clip_308, y = var_5056_promoted)[name = string("op_5070")]; + tensor var_5072_axes_0 = const()[name = string("op_5072_axes_0"), val = tensor([-1])]; + bool var_5072_keep_dims_0 = const()[name = string("op_5072_keep_dims_0"), val = bool(true)]; + tensor var_5072 = reduce_mean(axes = var_5072_axes_0, keep_dims = var_5072_keep_dims_0, x = var_5070)[name = string("op_5072")]; + string var_5072_to_fp16_dtype_0 = const()[name = string("op_5072_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5073_to_fp16 = const()[name = string("op_5073_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5072_to_fp16 = cast(dtype = var_5072_to_fp16_dtype_0, x = var_5072)[name = string("cast_43")]; + tensor mean_squared_199_cast_fp16 = add(x = var_5072_to_fp16, y = var_5073_to_fp16)[name = string("mean_squared_199_cast_fp16")]; + string mean_squared_199_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_199_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_199_cast_fp16_to_fp32 = cast(dtype = mean_squared_199_cast_fp16_to_fp32_dtype_0, x = mean_squared_199_cast_fp16)[name = string("cast_42")]; + tensor var_5075 = pow(x = mean_squared_199_cast_fp16_to_fp32, y = var_5060)[name = string("op_5075")]; + string clip_308_to_fp16_dtype_0 = const()[name = string("clip_308_to_fp16_dtype_0"), val = string("fp16")]; + string var_5075_to_fp16_dtype_0 = const()[name = string("op_5075_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_308_to_fp16 = cast(dtype = clip_308_to_fp16_dtype_0, x = clip_308)[name = string("cast_40")]; + tensor var_5075_to_fp16 = cast(dtype = var_5075_to_fp16_dtype_0, x = var_5075)[name = string("cast_41")]; + tensor normed_output_397_cast_fp16 = mul(x = clip_308_to_fp16, y = var_5075_to_fp16)[name = string("normed_output_397_cast_fp16")]; + tensor const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133960320)))]; + tensor normed_output_399_cast_fp16 = mul(x = normed_output_397_cast_fp16, y = const_167_to_fp16)[name = string("normed_output_399_cast_fp16")]; + fp16 feed_forward1s_11_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward1s_11_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.28p+3)]; + fp16 feed_forward1s_11_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward1s_11_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.26p+3)]; + tensor clip_309_cast_fp16 = clip(alpha = feed_forward1s_11_ffw_layer_1_input_min_to_fp16, beta = feed_forward1s_11_ffw_layer_1_input_max_to_fp16, x = normed_output_399_cast_fp16)[name = string("clip_309_cast_fp16")]; + tensor feed_forward1s_11_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133962432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136059648))))[name = string("feed_forward1s_11_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_122_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward1s_11_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_309_cast_fp16)[name = string("linear_122_cast_fp16")]; + fp16 feed_forward1s_11_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward1s_11_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.5ep+4)]; + fp16 feed_forward1s_11_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward1s_11_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.5ap+4)]; + tensor clip_310_cast_fp16 = clip(alpha = feed_forward1s_11_ffw_layer_1_output_min_to_fp16, beta = feed_forward1s_11_ffw_layer_1_output_max_to_fp16, x = linear_122_cast_fp16)[name = string("clip_310_cast_fp16")]; + tensor hidden_states_1143_cast_fp16 = silu(x = clip_310_cast_fp16)[name = string("hidden_states_1143_cast_fp16")]; + fp16 feed_forward1s_11_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward1s_11_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.3cp+3)]; + fp16 feed_forward1s_11_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward1s_11_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.3ap+3)]; + tensor clip_311_cast_fp16 = clip(alpha = feed_forward1s_11_ffw_layer_2_input_min_to_fp16, beta = feed_forward1s_11_ffw_layer_2_input_max_to_fp16, x = hidden_states_1143_cast_fp16)[name = string("clip_311_cast_fp16")]; + tensor feed_forward1s_11_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136063808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138161024))))[name = string("feed_forward1s_11_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_123_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward1s_11_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_311_cast_fp16)[name = string("linear_123_cast_fp16")]; + fp16 feed_forward1s_11_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward1s_11_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.9p+5)]; + fp16 feed_forward1s_11_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward1s_11_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.8cp+5)]; + tensor clip_312_cast_fp16 = clip(alpha = feed_forward1s_11_ffw_layer_2_output_min_to_fp16, beta = feed_forward1s_11_ffw_layer_2_output_max_to_fp16, x = linear_123_cast_fp16)[name = string("clip_312_cast_fp16")]; + string clip_312_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_312_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_312_cast_fp16_to_fp32 = cast(dtype = clip_312_cast_fp16_to_fp32_dtype_0, x = clip_312_cast_fp16)[name = string("cast_39")]; + tensor clip_313 = clip(alpha = var_5062, beta = var_5061, x = clip_312_cast_fp16_to_fp32)[name = string("clip_313")]; + fp32 var_5056_promoted_1 = const()[name = string("op_5056_promoted_1"), val = fp32(0x1p+1)]; + tensor var_5102 = pow(x = clip_313, y = var_5056_promoted_1)[name = string("op_5102")]; + tensor var_5104_axes_0 = const()[name = string("op_5104_axes_0"), val = tensor([-1])]; + bool var_5104_keep_dims_0 = const()[name = string("op_5104_keep_dims_0"), val = bool(true)]; + tensor var_5104 = reduce_mean(axes = var_5104_axes_0, keep_dims = var_5104_keep_dims_0, x = var_5102)[name = string("op_5104")]; + string var_5104_to_fp16_dtype_0 = const()[name = string("op_5104_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5105_to_fp16 = const()[name = string("op_5105_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5104_to_fp16 = cast(dtype = var_5104_to_fp16_dtype_0, x = var_5104)[name = string("cast_38")]; + tensor mean_squared_201_cast_fp16 = add(x = var_5104_to_fp16, y = var_5105_to_fp16)[name = string("mean_squared_201_cast_fp16")]; + string mean_squared_201_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_201_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_201_cast_fp16_to_fp32 = cast(dtype = mean_squared_201_cast_fp16_to_fp32_dtype_0, x = mean_squared_201_cast_fp16)[name = string("cast_37")]; + tensor var_5107 = pow(x = mean_squared_201_cast_fp16_to_fp32, y = var_5060)[name = string("op_5107")]; + string clip_313_to_fp16_dtype_0 = const()[name = string("clip_313_to_fp16_dtype_0"), val = string("fp16")]; + string var_5107_to_fp16_dtype_0 = const()[name = string("op_5107_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_313_to_fp16 = cast(dtype = clip_313_to_fp16_dtype_0, x = clip_313)[name = string("cast_35")]; + tensor var_5107_to_fp16 = cast(dtype = var_5107_to_fp16_dtype_0, x = var_5107)[name = string("cast_36")]; + tensor normed_output_401_cast_fp16 = mul(x = clip_313_to_fp16, y = var_5107_to_fp16)[name = string("normed_output_401_cast_fp16")]; + tensor const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138162112)))]; + tensor normed_output_403_cast_fp16 = mul(x = normed_output_401_cast_fp16, y = const_168_to_fp16)[name = string("normed_output_403_cast_fp16")]; + fp16 var_5052_to_fp16 = const()[name = string("op_5052_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_1155_cast_fp16 = mul(x = normed_output_403_cast_fp16, y = var_5052_to_fp16)[name = string("hidden_states_1155_cast_fp16")]; + tensor hidden_states_1157_cast_fp16 = add(x = hidden_states_1155_cast_fp16, y = normed_output_395_cast_fp16)[name = string("hidden_states_1157_cast_fp16")]; + fp16 var_5114_to_fp16 = const()[name = string("op_5114_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_5115_to_fp16 = const()[name = string("op_5115_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_314_cast_fp16 = clip(alpha = var_5114_to_fp16, beta = var_5115_to_fp16, x = hidden_states_1157_cast_fp16)[name = string("clip_314_cast_fp16")]; + string clip_314_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_314_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_5117 = const()[name = string("op_5117"), val = fp32(-0x1p-1)]; + fp32 var_5121_promoted = const()[name = string("op_5121_promoted"), val = fp32(0x1p+1)]; + tensor clip_314_cast_fp16_to_fp32 = cast(dtype = clip_314_cast_fp16_to_fp32_dtype_0, x = clip_314_cast_fp16)[name = string("cast_34")]; + tensor var_5127 = pow(x = clip_314_cast_fp16_to_fp32, y = var_5121_promoted)[name = string("op_5127")]; + tensor var_5129_axes_0 = const()[name = string("op_5129_axes_0"), val = tensor([-1])]; + bool var_5129_keep_dims_0 = const()[name = string("op_5129_keep_dims_0"), val = bool(true)]; + tensor var_5129 = reduce_mean(axes = var_5129_axes_0, keep_dims = var_5129_keep_dims_0, x = var_5127)[name = string("op_5129")]; + string var_5129_to_fp16_dtype_0 = const()[name = string("op_5129_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5130_to_fp16 = const()[name = string("op_5130_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5129_to_fp16 = cast(dtype = var_5129_to_fp16_dtype_0, x = var_5129)[name = string("cast_33")]; + tensor mean_squared_203_cast_fp16 = add(x = var_5129_to_fp16, y = var_5130_to_fp16)[name = string("mean_squared_203_cast_fp16")]; + string mean_squared_203_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_203_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_203_cast_fp16_to_fp32 = cast(dtype = mean_squared_203_cast_fp16_to_fp32_dtype_0, x = mean_squared_203_cast_fp16)[name = string("cast_32")]; + tensor var_5132 = pow(x = mean_squared_203_cast_fp16_to_fp32, y = var_5117)[name = string("op_5132")]; + string var_5132_to_fp16_dtype_0 = const()[name = string("op_5132_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_5132_to_fp16 = cast(dtype = var_5132_to_fp16_dtype_0, x = var_5132)[name = string("cast_31")]; + tensor normed_output_405_cast_fp16 = mul(x = clip_314_cast_fp16, y = var_5132_to_fp16)[name = string("normed_output_405_cast_fp16")]; + tensor const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138164224)))]; + tensor normed_output_407_cast_fp16 = mul(x = normed_output_405_cast_fp16, y = const_169_to_fp16)[name = string("normed_output_407_cast_fp16")]; + int32 var_5138 = const()[name = string("op_5138"), val = int32(-1)]; + fp32 var_5139 = const()[name = string("op_5139"), val = fp32(-0x1.dcd65p+29)]; + fp16 self_attns_11_q_proj_input_min_to_fp16 = const()[name = string("self_attns_11_q_proj_input_min_to_fp16"), val = fp16(-0x1.2cp+3)]; + fp16 self_attns_11_q_proj_input_max_to_fp16 = const()[name = string("self_attns_11_q_proj_input_max_to_fp16"), val = fp16(0x1.28p+3)]; + tensor clip_315_cast_fp16 = clip(alpha = self_attns_11_q_proj_input_min_to_fp16, beta = self_attns_11_q_proj_input_max_to_fp16, x = normed_output_407_cast_fp16)[name = string("clip_315_cast_fp16")]; + tensor self_attns_11_q_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138166336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138690688))))[name = string("self_attns_11_q_proj_linear_weight_to_fp16_palettized")]; + tensor linear_124_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_11_q_proj_linear_weight_to_fp16_palettized, x = clip_315_cast_fp16)[name = string("linear_124_cast_fp16")]; + fp16 self_attns_11_q_proj_output_min_to_fp16 = const()[name = string("self_attns_11_q_proj_output_min_to_fp16"), val = fp16(-0x1.12p+4)]; + fp16 self_attns_11_q_proj_output_max_to_fp16 = const()[name = string("self_attns_11_q_proj_output_max_to_fp16"), val = fp16(0x1.1p+4)]; + tensor clip_316_cast_fp16 = clip(alpha = self_attns_11_q_proj_output_min_to_fp16, beta = self_attns_11_q_proj_output_max_to_fp16, x = linear_124_cast_fp16)[name = string("clip_316_cast_fp16")]; + tensor var_5183 = const()[name = string("op_5183"), val = tensor([1, 50, 8, 128])]; + tensor q_cast_fp16 = reshape(shape = var_5183, x = clip_316_cast_fp16)[name = string("q_cast_fp16")]; + tensor self_attns_11_k_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138691776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139216128))))[name = string("self_attns_11_k_proj_linear_weight_to_fp16_palettized")]; + tensor linear_125_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_11_k_proj_linear_weight_to_fp16_palettized, x = clip_315_cast_fp16)[name = string("linear_125_cast_fp16")]; + fp16 self_attns_11_k_proj_output_min_to_fp16 = const()[name = string("self_attns_11_k_proj_output_min_to_fp16"), val = fp16(-0x1.12p+4)]; + fp16 self_attns_11_k_proj_output_max_to_fp16 = const()[name = string("self_attns_11_k_proj_output_max_to_fp16"), val = fp16(0x1.1p+4)]; + tensor clip_318_cast_fp16 = clip(alpha = self_attns_11_k_proj_output_min_to_fp16, beta = self_attns_11_k_proj_output_max_to_fp16, x = linear_125_cast_fp16)[name = string("clip_318_cast_fp16")]; + tensor var_5195 = const()[name = string("op_5195"), val = tensor([1, 50, 8, 128])]; + tensor k_cast_fp16 = reshape(shape = var_5195, x = clip_318_cast_fp16)[name = string("k_cast_fp16")]; + tensor self_attns_11_v_proj_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139217216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139741568))))[name = string("self_attns_11_v_proj_linear_weight_to_fp16_palettized")]; + tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_11_v_proj_linear_weight_to_fp16_palettized, x = clip_315_cast_fp16)[name = string("linear_126_cast_fp16")]; + fp16 self_attns_11_v_proj_output_min_to_fp16 = const()[name = string("self_attns_11_v_proj_output_min_to_fp16"), val = fp16(-0x1.12p+4)]; + fp16 self_attns_11_v_proj_output_max_to_fp16 = const()[name = string("self_attns_11_v_proj_output_max_to_fp16"), val = fp16(0x1.1p+4)]; + tensor clip_320_cast_fp16 = clip(alpha = self_attns_11_v_proj_output_min_to_fp16, beta = self_attns_11_v_proj_output_max_to_fp16, x = linear_126_cast_fp16)[name = string("clip_320_cast_fp16")]; + tensor var_5207 = const()[name = string("op_5207"), val = tensor([1, 50, 8, 128])]; + tensor input_495_cast_fp16 = reshape(shape = var_5207, x = clip_320_cast_fp16)[name = string("input_495_cast_fp16")]; + fp16 var_5209_to_fp16 = const()[name = string("op_5209_to_fp16"), val = fp16(0x1.054p-3)]; + tensor var_5210_cast_fp16 = mul(x = q_cast_fp16, y = var_5209_to_fp16)[name = string("op_5210_cast_fp16")]; + tensor var_5211_to_fp16 = const()[name = string("op_5211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139742656)))]; + tensor input_491_cast_fp16 = mul(x = var_5210_cast_fp16, y = var_5211_to_fp16)[name = string("input_491_cast_fp16")]; + fp16 var_5213_to_fp16 = const()[name = string("op_5213_to_fp16"), val = fp16(0x1.e5p+0)]; + tensor input_493_cast_fp16 = mul(x = k_cast_fp16, y = var_5213_to_fp16)[name = string("input_493_cast_fp16")]; + tensor q_padded_pad_0 = const()[name = string("q_padded_pad_0"), val = tensor([0, 0, 0, 10, 0, 0, 0, 0])]; + string q_padded_mode_0 = const()[name = string("q_padded_mode_0"), val = string("constant")]; + fp16 const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = fp16(0x0p+0)]; + tensor q_padded_cast_fp16 = pad(constant_val = const_170_to_fp16, mode = q_padded_mode_0, pad = q_padded_pad_0, x = input_491_cast_fp16)[name = string("q_padded_cast_fp16")]; + tensor var_5217 = const()[name = string("op_5217"), val = tensor([1, 5, 12, 8, 128])]; + tensor q_blocks_cast_fp16 = reshape(shape = var_5217, x = q_padded_cast_fp16)[name = string("q_blocks_cast_fp16")]; + tensor k_padded_pad_0 = const()[name = string("k_padded_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string k_padded_mode_0 = const()[name = string("k_padded_mode_0"), val = string("constant")]; + fp16 const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_cast_fp16 = pad(constant_val = const_171_to_fp16, mode = k_padded_mode_0, pad = k_padded_pad_0, x = input_493_cast_fp16)[name = string("k_padded_cast_fp16")]; + tensor v_padded_pad_0 = const()[name = string("v_padded_pad_0"), val = tensor([0, 0, 12, 11, 0, 0, 0, 0])]; + string v_padded_mode_0 = const()[name = string("v_padded_mode_0"), val = string("constant")]; + fp16 const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_cast_fp16 = pad(constant_val = const_172_to_fp16, mode = v_padded_mode_0, pad = v_padded_pad_0, x = input_495_cast_fp16)[name = string("v_padded_cast_fp16")]; + tensor var_5224_begin_0 = const()[name = string("op_5224_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5224_end_0 = const()[name = string("op_5224_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_5224_end_mask_0 = const()[name = string("op_5224_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5224_cast_fp16 = slice_by_index(begin = var_5224_begin_0, end = var_5224_end_0, end_mask = var_5224_end_mask_0, x = k_padded_cast_fp16)[name = string("op_5224_cast_fp16")]; + tensor var_5226_begin_0 = const()[name = string("op_5226_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_5226_end_0 = const()[name = string("op_5226_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_5226_end_mask_0 = const()[name = string("op_5226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5226_cast_fp16 = slice_by_index(begin = var_5226_begin_0, end = var_5226_end_0, end_mask = var_5226_end_mask_0, x = k_padded_cast_fp16)[name = string("op_5226_cast_fp16")]; + tensor var_5228_begin_0 = const()[name = string("op_5228_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_5228_end_0 = const()[name = string("op_5228_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_5228_end_mask_0 = const()[name = string("op_5228_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5228_cast_fp16 = slice_by_index(begin = var_5228_begin_0, end = var_5228_end_0, end_mask = var_5228_end_mask_0, x = k_padded_cast_fp16)[name = string("op_5228_cast_fp16")]; + tensor var_5230_begin_0 = const()[name = string("op_5230_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_5230_end_0 = const()[name = string("op_5230_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_5230_end_mask_0 = const()[name = string("op_5230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5230_cast_fp16 = slice_by_index(begin = var_5230_begin_0, end = var_5230_end_0, end_mask = var_5230_end_mask_0, x = k_padded_cast_fp16)[name = string("op_5230_cast_fp16")]; + tensor var_5232_begin_0 = const()[name = string("op_5232_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_5232_end_0 = const()[name = string("op_5232_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_5232_end_mask_0 = const()[name = string("op_5232_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5232_cast_fp16 = slice_by_index(begin = var_5232_begin_0, end = var_5232_end_0, end_mask = var_5232_end_mask_0, x = k_padded_cast_fp16)[name = string("op_5232_cast_fp16")]; + int32 k_blocks_axis_0 = const()[name = string("k_blocks_axis_0"), val = int32(1)]; + tensor k_blocks_cast_fp16 = stack(axis = k_blocks_axis_0, values = (var_5224_cast_fp16, var_5226_cast_fp16, var_5228_cast_fp16, var_5230_cast_fp16, var_5232_cast_fp16))[name = string("k_blocks_cast_fp16")]; + tensor var_5236_begin_0 = const()[name = string("op_5236_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5236_end_0 = const()[name = string("op_5236_end_0"), val = tensor([1, 24, 8, 128])]; + tensor var_5236_end_mask_0 = const()[name = string("op_5236_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5236_cast_fp16 = slice_by_index(begin = var_5236_begin_0, end = var_5236_end_0, end_mask = var_5236_end_mask_0, x = v_padded_cast_fp16)[name = string("op_5236_cast_fp16")]; + tensor var_5238_begin_0 = const()[name = string("op_5238_begin_0"), val = tensor([0, 12, 0, 0])]; + tensor var_5238_end_0 = const()[name = string("op_5238_end_0"), val = tensor([1, 36, 8, 128])]; + tensor var_5238_end_mask_0 = const()[name = string("op_5238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5238_cast_fp16 = slice_by_index(begin = var_5238_begin_0, end = var_5238_end_0, end_mask = var_5238_end_mask_0, x = v_padded_cast_fp16)[name = string("op_5238_cast_fp16")]; + tensor var_5240_begin_0 = const()[name = string("op_5240_begin_0"), val = tensor([0, 24, 0, 0])]; + tensor var_5240_end_0 = const()[name = string("op_5240_end_0"), val = tensor([1, 48, 8, 128])]; + tensor var_5240_end_mask_0 = const()[name = string("op_5240_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5240_cast_fp16 = slice_by_index(begin = var_5240_begin_0, end = var_5240_end_0, end_mask = var_5240_end_mask_0, x = v_padded_cast_fp16)[name = string("op_5240_cast_fp16")]; + tensor var_5242_begin_0 = const()[name = string("op_5242_begin_0"), val = tensor([0, 36, 0, 0])]; + tensor var_5242_end_0 = const()[name = string("op_5242_end_0"), val = tensor([1, 60, 8, 128])]; + tensor var_5242_end_mask_0 = const()[name = string("op_5242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5242_cast_fp16 = slice_by_index(begin = var_5242_begin_0, end = var_5242_end_0, end_mask = var_5242_end_mask_0, x = v_padded_cast_fp16)[name = string("op_5242_cast_fp16")]; + tensor var_5244_begin_0 = const()[name = string("op_5244_begin_0"), val = tensor([0, 48, 0, 0])]; + tensor var_5244_end_0 = const()[name = string("op_5244_end_0"), val = tensor([1, 72, 8, 128])]; + tensor var_5244_end_mask_0 = const()[name = string("op_5244_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5244_cast_fp16 = slice_by_index(begin = var_5244_begin_0, end = var_5244_end_0, end_mask = var_5244_end_mask_0, x = v_padded_cast_fp16)[name = string("op_5244_cast_fp16")]; + int32 v_blocks_axis_0 = const()[name = string("v_blocks_axis_0"), val = int32(1)]; + tensor v_blocks_cast_fp16 = stack(axis = v_blocks_axis_0, values = (var_5236_cast_fp16, var_5238_cast_fp16, var_5240_cast_fp16, var_5242_cast_fp16, var_5244_cast_fp16))[name = string("v_blocks_cast_fp16")]; + tensor var_5252 = const()[name = string("op_5252"), val = tensor([0, 3, 1, -3, -1])]; + tensor var_5254 = const()[name = string("op_5254"), val = tensor([0, 3, 1, -1, -3])]; + bool matrix_ac_transpose_x_0 = const()[name = string("matrix_ac_transpose_x_0"), val = bool(false)]; + bool matrix_ac_transpose_y_0 = const()[name = string("matrix_ac_transpose_y_0"), val = bool(false)]; + tensor queries_cast_fp16 = transpose(perm = var_5252, x = q_blocks_cast_fp16)[name = string("transpose_4")]; + tensor keys_t_cast_fp16 = transpose(perm = var_5254, x = k_blocks_cast_fp16)[name = string("transpose_5")]; + tensor matrix_ac_cast_fp16 = matmul(transpose_x = matrix_ac_transpose_x_0, transpose_y = matrix_ac_transpose_y_0, x = queries_cast_fp16, y = keys_t_cast_fp16)[name = string("matrix_ac_cast_fp16")]; + tensor var_5257 = const()[name = string("op_5257"), val = tensor([1, 8, 60, 128])]; + tensor q_flat_cast_fp16 = reshape(shape = var_5257, x = queries_cast_fp16)[name = string("q_flat_cast_fp16")]; + bool matrix_bd_111_transpose_x_0 = const()[name = string("matrix_bd_111_transpose_x_0"), val = bool(false)]; + bool matrix_bd_111_transpose_y_0 = const()[name = string("matrix_bd_111_transpose_y_0"), val = bool(false)]; + tensor rel_k_t_to_fp16 = const()[name = string("rel_k_t_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139742976)))]; + tensor matrix_bd_111_cast_fp16 = matmul(transpose_x = matrix_bd_111_transpose_x_0, transpose_y = matrix_bd_111_transpose_y_0, x = q_flat_cast_fp16, y = rel_k_t_to_fp16)[name = string("matrix_bd_111_cast_fp16")]; + tensor var_5262 = const()[name = string("op_5262"), val = tensor([1, 8, 5, 12, 13])]; + tensor input_497_cast_fp16 = reshape(shape = var_5262, x = matrix_bd_111_cast_fp16)[name = string("input_497_cast_fp16")]; + tensor matrix_bd_113_pad_0 = const()[name = string("matrix_bd_113_pad_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139769664)))]; + string matrix_bd_113_mode_0 = const()[name = string("matrix_bd_113_mode_0"), val = string("constant")]; + fp16 const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = fp16(0x0p+0)]; + tensor matrix_bd_113_cast_fp16 = pad(constant_val = const_174_to_fp16, mode = matrix_bd_113_mode_0, pad = matrix_bd_113_pad_0, x = input_497_cast_fp16)[name = string("matrix_bd_113_cast_fp16")]; + tensor var_5266 = const()[name = string("op_5266"), val = tensor([1, 8, 5, 300])]; + tensor matrix_bd_115_cast_fp16 = reshape(shape = var_5266, x = matrix_bd_113_cast_fp16)[name = string("matrix_bd_115_cast_fp16")]; + tensor matrix_bd_117_begin_0 = const()[name = string("matrix_bd_117_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor matrix_bd_117_end_0 = const()[name = string("matrix_bd_117_end_0"), val = tensor([1, 8, 5, 288])]; + tensor matrix_bd_117_end_mask_0 = const()[name = string("matrix_bd_117_end_mask_0"), val = tensor([true, true, true, false])]; + tensor matrix_bd_117_cast_fp16 = slice_by_index(begin = matrix_bd_117_begin_0, end = matrix_bd_117_end_0, end_mask = matrix_bd_117_end_mask_0, x = matrix_bd_115_cast_fp16)[name = string("matrix_bd_117_cast_fp16")]; + tensor var_5272 = const()[name = string("op_5272"), val = tensor([1, 8, 5, 12, 24])]; + tensor matrix_bd_cast_fp16 = reshape(shape = var_5272, x = matrix_bd_117_cast_fp16)[name = string("matrix_bd_cast_fp16")]; + tensor attn_67_cast_fp16 = add(x = matrix_ac_cast_fp16, y = matrix_bd_cast_fp16)[name = string("attn_67_cast_fp16")]; + fp16 _inversed_5275_y_0_to_fp16 = const()[name = string("_inversed_5275_y_0_to_fp16"), val = fp16(0x1.47cp-6)]; + tensor _inversed_5275_cast_fp16 = mul(x = attn_67_cast_fp16, y = _inversed_5275_y_0_to_fp16)[name = string("_inversed_5275_cast_fp16")]; + string _inversed_5275_cast_fp16_to_fp32_dtype_0 = const()[name = string("_inversed_5275_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor _inversed_5275_cast_fp16_to_fp32 = cast(dtype = _inversed_5275_cast_fp16_to_fp32_dtype_0, x = _inversed_5275_cast_fp16)[name = string("cast_30")]; + tensor var_5276 = tanh(x = _inversed_5275_cast_fp16_to_fp32)[name = string("op_5276")]; + string var_5276_to_fp16_dtype_0 = const()[name = string("op_5276_to_fp16_dtype_0"), val = string("fp16")]; + fp16 self_attns_11_softcap_to_fp16 = const()[name = string("self_attns_11_softcap_to_fp16"), val = fp16(0x1.9p+5)]; + tensor var_5276_to_fp16 = cast(dtype = var_5276_to_fp16_dtype_0, x = var_5276)[name = string("cast_29")]; + tensor attn_69_cast_fp16 = mul(x = var_5276_to_fp16, y = self_attns_11_softcap_to_fp16)[name = string("attn_69_cast_fp16")]; + string attn_69_cast_fp16_to_fp32_dtype_0 = const()[name = string("attn_69_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor attn_69_cast_fp16_to_fp32 = cast(dtype = attn_69_cast_fp16_to_fp32_dtype_0, x = attn_69_cast_fp16)[name = string("cast_28")]; + tensor input_499 = select(a = var_5139, b = attn_69_cast_fp16_to_fp32, cond = var_460)[name = string("input_499")]; + tensor var_5280 = softmax(axis = var_5138, x = input_499)[name = string("op_5280")]; + tensor var_5282 = const()[name = string("op_5282"), val = tensor([0, 3, 1, -3, -1])]; + bool out_67_transpose_x_0 = const()[name = string("out_67_transpose_x_0"), val = bool(false)]; + bool out_67_transpose_y_0 = const()[name = string("out_67_transpose_y_0"), val = bool(false)]; + string var_5280_to_fp16_dtype_0 = const()[name = string("op_5280_to_fp16_dtype_0"), val = string("fp16")]; + tensor values_t_cast_fp16 = transpose(perm = var_5282, x = v_blocks_cast_fp16)[name = string("transpose_3")]; + tensor var_5280_to_fp16 = cast(dtype = var_5280_to_fp16_dtype_0, x = var_5280)[name = string("cast_27")]; + tensor out_67_cast_fp16 = matmul(transpose_x = out_67_transpose_x_0, transpose_y = out_67_transpose_y_0, x = var_5280_to_fp16, y = values_t_cast_fp16)[name = string("out_67_cast_fp16")]; + tensor var_5285 = const()[name = string("op_5285"), val = tensor([0, 2, 3, 1, 4])]; + tensor var_5287 = const()[name = string("op_5287"), val = tensor([1, 60, 1024])]; + tensor var_5286_cast_fp16 = transpose(perm = var_5285, x = out_67_cast_fp16)[name = string("transpose_2")]; + tensor out_69_cast_fp16 = reshape(shape = var_5287, x = var_5286_cast_fp16)[name = string("out_69_cast_fp16")]; + tensor var_5290_begin_0 = const()[name = string("op_5290_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5290_end_0 = const()[name = string("op_5290_end_0"), val = tensor([1, 50, 1024])]; + tensor var_5290_end_mask_0 = const()[name = string("op_5290_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5290_cast_fp16 = slice_by_index(begin = var_5290_begin_0, end = var_5290_end_0, end_mask = var_5290_end_mask_0, x = out_69_cast_fp16)[name = string("op_5290_cast_fp16")]; + fp16 self_attns_11_post_input_min_to_fp16 = const()[name = string("self_attns_11_post_input_min_to_fp16"), val = fp16(-0x1.eep+3)]; + fp16 self_attns_11_post_input_max_to_fp16 = const()[name = string("self_attns_11_post_input_max_to_fp16"), val = fp16(0x1.eap+3)]; + tensor clip_321_cast_fp16 = clip(alpha = self_attns_11_post_input_min_to_fp16, beta = self_attns_11_post_input_max_to_fp16, x = var_5290_cast_fp16)[name = string("clip_321_cast_fp16")]; + tensor self_attns_11_post_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139769792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140294144))))[name = string("self_attns_11_post_linear_weight_to_fp16_palettized")]; + tensor linear_128_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = self_attns_11_post_linear_weight_to_fp16_palettized, x = clip_321_cast_fp16)[name = string("linear_128_cast_fp16")]; + fp16 self_attns_11_post_output_min_to_fp16 = const()[name = string("self_attns_11_post_output_min_to_fp16"), val = fp16(-0x1.2p+6)]; + fp16 self_attns_11_post_output_max_to_fp16 = const()[name = string("self_attns_11_post_output_max_to_fp16"), val = fp16(0x1.1ep+6)]; + tensor clip_322_cast_fp16 = clip(alpha = self_attns_11_post_output_min_to_fp16, beta = self_attns_11_post_output_max_to_fp16, x = linear_128_cast_fp16)[name = string("clip_322_cast_fp16")]; + fp16 var_5302_to_fp16 = const()[name = string("op_5302_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_5303_to_fp16 = const()[name = string("op_5303_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_323_cast_fp16 = clip(alpha = var_5302_to_fp16, beta = var_5303_to_fp16, x = clip_322_cast_fp16)[name = string("clip_323_cast_fp16")]; + string clip_323_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_323_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_5305 = const()[name = string("op_5305"), val = fp32(-0x1p-1)]; + fp32 var_5309_promoted = const()[name = string("op_5309_promoted"), val = fp32(0x1p+1)]; + tensor clip_323_cast_fp16_to_fp32 = cast(dtype = clip_323_cast_fp16_to_fp32_dtype_0, x = clip_323_cast_fp16)[name = string("cast_26")]; + tensor var_5315 = pow(x = clip_323_cast_fp16_to_fp32, y = var_5309_promoted)[name = string("op_5315")]; + tensor var_5317_axes_0 = const()[name = string("op_5317_axes_0"), val = tensor([-1])]; + bool var_5317_keep_dims_0 = const()[name = string("op_5317_keep_dims_0"), val = bool(true)]; + tensor var_5317 = reduce_mean(axes = var_5317_axes_0, keep_dims = var_5317_keep_dims_0, x = var_5315)[name = string("op_5317")]; + string var_5317_to_fp16_dtype_0 = const()[name = string("op_5317_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5318_to_fp16 = const()[name = string("op_5318_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5317_to_fp16 = cast(dtype = var_5317_to_fp16_dtype_0, x = var_5317)[name = string("cast_25")]; + tensor mean_squared_205_cast_fp16 = add(x = var_5317_to_fp16, y = var_5318_to_fp16)[name = string("mean_squared_205_cast_fp16")]; + string mean_squared_205_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_205_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_205_cast_fp16_to_fp32 = cast(dtype = mean_squared_205_cast_fp16_to_fp32_dtype_0, x = mean_squared_205_cast_fp16)[name = string("cast_24")]; + tensor var_5320 = pow(x = mean_squared_205_cast_fp16_to_fp32, y = var_5305)[name = string("op_5320")]; + string var_5320_to_fp16_dtype_0 = const()[name = string("op_5320_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_5320_to_fp16 = cast(dtype = var_5320_to_fp16_dtype_0, x = var_5320)[name = string("cast_23")]; + tensor normed_output_409_cast_fp16 = mul(x = clip_323_cast_fp16, y = var_5320_to_fp16)[name = string("normed_output_409_cast_fp16")]; + tensor const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140295232)))]; + tensor normed_output_411_cast_fp16 = mul(x = normed_output_409_cast_fp16, y = const_175_to_fp16)[name = string("normed_output_411_cast_fp16")]; + tensor hidden_states_1183_cast_fp16 = add(x = normed_output_411_cast_fp16, y = hidden_states_1157_cast_fp16)[name = string("hidden_states_1183_cast_fp16")]; + string hidden_states_1183_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_1183_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_5327 = const()[name = string("op_5327"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_5328 = const()[name = string("op_5328"), val = fp32(-0x1.2a05f2p+33)]; + fp32 var_5340 = const()[name = string("op_5340"), val = fp32(-0x1p-1)]; + fp32 var_5336_promoted = const()[name = string("op_5336_promoted"), val = fp32(0x1p+1)]; + tensor hidden_states_1183_cast_fp16_to_fp32 = cast(dtype = hidden_states_1183_cast_fp16_to_fp32_dtype_0, x = hidden_states_1183_cast_fp16)[name = string("cast_22")]; + tensor var_5348 = pow(x = hidden_states_1183_cast_fp16_to_fp32, y = var_5336_promoted)[name = string("op_5348")]; + tensor var_5350_axes_0 = const()[name = string("op_5350_axes_0"), val = tensor([-1])]; + bool var_5350_keep_dims_0 = const()[name = string("op_5350_keep_dims_0"), val = bool(true)]; + tensor var_5350 = reduce_mean(axes = var_5350_axes_0, keep_dims = var_5350_keep_dims_0, x = var_5348)[name = string("op_5350")]; + string var_5350_to_fp16_dtype_0 = const()[name = string("op_5350_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5351_to_fp16 = const()[name = string("op_5351_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5350_to_fp16 = cast(dtype = var_5350_to_fp16_dtype_0, x = var_5350)[name = string("cast_21")]; + tensor mean_squared_207_cast_fp16 = add(x = var_5350_to_fp16, y = var_5351_to_fp16)[name = string("mean_squared_207_cast_fp16")]; + string mean_squared_207_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_207_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_207_cast_fp16_to_fp32 = cast(dtype = mean_squared_207_cast_fp16_to_fp32_dtype_0, x = mean_squared_207_cast_fp16)[name = string("cast_20")]; + tensor var_5353 = pow(x = mean_squared_207_cast_fp16_to_fp32, y = var_5340)[name = string("op_5353")]; + string var_5353_to_fp16_dtype_0 = const()[name = string("op_5353_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_5353_to_fp16 = cast(dtype = var_5353_to_fp16_dtype_0, x = var_5353)[name = string("cast_19")]; + tensor normed_output_413_cast_fp16 = mul(x = hidden_states_1183_cast_fp16, y = var_5353_to_fp16)[name = string("normed_output_413_cast_fp16")]; + tensor const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140297344)))]; + tensor normed_output_415_cast_fp16 = mul(x = normed_output_413_cast_fp16, y = const_176_to_fp16)[name = string("normed_output_415_cast_fp16")]; + fp16 lconv1ds_11_linear_start_input_min_to_fp16 = const()[name = string("lconv1ds_11_linear_start_input_min_to_fp16"), val = fp16(-0x1.64p+3)]; + fp16 lconv1ds_11_linear_start_input_max_to_fp16 = const()[name = string("lconv1ds_11_linear_start_input_max_to_fp16"), val = fp16(0x1.6p+3)]; + tensor clip_324_cast_fp16 = clip(alpha = lconv1ds_11_linear_start_input_min_to_fp16, beta = lconv1ds_11_linear_start_input_max_to_fp16, x = normed_output_415_cast_fp16)[name = string("clip_324_cast_fp16")]; + tensor lconv1ds_11_linear_start_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140299456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141348096))))[name = string("lconv1ds_11_linear_start_linear_weight_to_fp16_palettized")]; + tensor linear_129_cast_fp16 = linear(bias = linear_8_bias_0_to_fp16, weight = lconv1ds_11_linear_start_linear_weight_to_fp16_palettized, x = clip_324_cast_fp16)[name = string("linear_129_cast_fp16")]; + fp16 lconv1ds_11_linear_start_output_min_to_fp16 = const()[name = string("lconv1ds_11_linear_start_output_min_to_fp16"), val = fp16(-0x1.b6p+4)]; + fp16 lconv1ds_11_linear_start_output_max_to_fp16 = const()[name = string("lconv1ds_11_linear_start_output_max_to_fp16"), val = fp16(0x1.b2p+4)]; + tensor clip_325_cast_fp16 = clip(alpha = lconv1ds_11_linear_start_output_min_to_fp16, beta = lconv1ds_11_linear_start_output_max_to_fp16, x = linear_129_cast_fp16)[name = string("clip_325_cast_fp16")]; + int32 hidden_states_1191_split_num_splits_0 = const()[name = string("hidden_states_1191_split_num_splits_0"), val = int32(2)]; + int32 hidden_states_1191_split_axis_0 = const()[name = string("hidden_states_1191_split_axis_0"), val = int32(-1)]; + tensor hidden_states_1191_split_cast_fp16_0, tensor hidden_states_1191_split_cast_fp16_1 = split(axis = hidden_states_1191_split_axis_0, num_splits = hidden_states_1191_split_num_splits_0, x = clip_325_cast_fp16)[name = string("hidden_states_1191_split_cast_fp16")]; + tensor hidden_states_1191_split_1_sigmoid_cast_fp16 = sigmoid(x = hidden_states_1191_split_cast_fp16_1)[name = string("hidden_states_1191_split_1_sigmoid_cast_fp16")]; + tensor hidden_states_1191_cast_fp16 = mul(x = hidden_states_1191_split_cast_fp16_0, y = hidden_states_1191_split_1_sigmoid_cast_fp16)[name = string("hidden_states_1191_cast_fp16")]; + tensor input_507_perm_0 = const()[name = string("input_507_perm_0"), val = tensor([0, 2, 1])]; + tensor input_509_pad_0 = const()[name = string("input_509_pad_0"), val = tensor([0, 0, 0, 0, 4, 0])]; + string input_509_mode_0 = const()[name = string("input_509_mode_0"), val = string("constant")]; + fp16 const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = fp16(0x0p+0)]; + tensor input_507_cast_fp16 = transpose(perm = input_507_perm_0, x = hidden_states_1191_cast_fp16)[name = string("transpose_1")]; + tensor input_509_cast_fp16 = pad(constant_val = const_177_to_fp16, mode = input_509_mode_0, pad = input_509_pad_0, x = input_507_cast_fp16)[name = string("input_509_cast_fp16")]; + string var_5379_pad_type_0 = const()[name = string("op_5379_pad_type_0"), val = string("valid")]; + int32 var_5379_groups_0 = const()[name = string("op_5379_groups_0"), val = int32(1024)]; + tensor var_5379_strides_0 = const()[name = string("op_5379_strides_0"), val = tensor([1])]; + tensor var_5379_pad_0 = const()[name = string("op_5379_pad_0"), val = tensor([0, 0])]; + tensor var_5379_dilations_0 = const()[name = string("op_5379_dilations_0"), val = tensor([1])]; + tensor lconv1ds_11_depthwise_conv1d_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141350208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141352832))))[name = string("lconv1ds_11_depthwise_conv1d_weight_to_fp16_palettized")]; + tensor var_5379_cast_fp16 = conv(dilations = var_5379_dilations_0, groups = var_5379_groups_0, pad = var_5379_pad_0, pad_type = var_5379_pad_type_0, strides = var_5379_strides_0, weight = lconv1ds_11_depthwise_conv1d_weight_to_fp16_palettized, x = input_509_cast_fp16)[name = string("op_5379_cast_fp16")]; + tensor hidden_states_1193_perm_0 = const()[name = string("hidden_states_1193_perm_0"), val = tensor([0, 2, 1])]; + string hidden_states_1193_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_1193_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor hidden_states_1193_cast_fp16 = transpose(perm = hidden_states_1193_perm_0, x = var_5379_cast_fp16)[name = string("transpose_0")]; + tensor hidden_states_1193_cast_fp16_to_fp32 = cast(dtype = hidden_states_1193_cast_fp16_to_fp32_dtype_0, x = hidden_states_1193_cast_fp16)[name = string("cast_18")]; + tensor clip_326 = clip(alpha = var_5328, beta = var_5327, x = hidden_states_1193_cast_fp16_to_fp32)[name = string("clip_326")]; + fp32 var_5336_promoted_1 = const()[name = string("op_5336_promoted_1"), val = fp32(0x1p+1)]; + tensor var_5384 = pow(x = clip_326, y = var_5336_promoted_1)[name = string("op_5384")]; + tensor var_5386_axes_0 = const()[name = string("op_5386_axes_0"), val = tensor([-1])]; + bool var_5386_keep_dims_0 = const()[name = string("op_5386_keep_dims_0"), val = bool(true)]; + tensor var_5386 = reduce_mean(axes = var_5386_axes_0, keep_dims = var_5386_keep_dims_0, x = var_5384)[name = string("op_5386")]; + string var_5386_to_fp16_dtype_0 = const()[name = string("op_5386_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5387_to_fp16 = const()[name = string("op_5387_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5386_to_fp16 = cast(dtype = var_5386_to_fp16_dtype_0, x = var_5386)[name = string("cast_17")]; + tensor mean_squared_209_cast_fp16 = add(x = var_5386_to_fp16, y = var_5387_to_fp16)[name = string("mean_squared_209_cast_fp16")]; + string mean_squared_209_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_209_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_209_cast_fp16_to_fp32 = cast(dtype = mean_squared_209_cast_fp16_to_fp32_dtype_0, x = mean_squared_209_cast_fp16)[name = string("cast_16")]; + tensor var_5389 = pow(x = mean_squared_209_cast_fp16_to_fp32, y = var_5340)[name = string("op_5389")]; + string clip_326_to_fp16_dtype_0 = const()[name = string("clip_326_to_fp16_dtype_0"), val = string("fp16")]; + string var_5389_to_fp16_dtype_0 = const()[name = string("op_5389_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_326_to_fp16 = cast(dtype = clip_326_to_fp16_dtype_0, x = clip_326)[name = string("cast_14")]; + tensor var_5389_to_fp16 = cast(dtype = var_5389_to_fp16_dtype_0, x = var_5389)[name = string("cast_15")]; + tensor normed_output_417_cast_fp16 = mul(x = clip_326_to_fp16, y = var_5389_to_fp16)[name = string("normed_output_417_cast_fp16")]; + tensor const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141353920)))]; + tensor normed_output_419_cast_fp16 = mul(x = normed_output_417_cast_fp16, y = const_178_to_fp16)[name = string("normed_output_419_cast_fp16")]; + tensor hidden_states_1199_cast_fp16 = silu(x = normed_output_419_cast_fp16)[name = string("hidden_states_1199_cast_fp16")]; + fp16 lconv1ds_11_linear_end_input_min_to_fp16 = const()[name = string("lconv1ds_11_linear_end_input_min_to_fp16"), val = fp16(-0x1.e6p+4)]; + fp16 lconv1ds_11_linear_end_input_max_to_fp16 = const()[name = string("lconv1ds_11_linear_end_input_max_to_fp16"), val = fp16(0x1.e2p+4)]; + tensor clip_327_cast_fp16 = clip(alpha = lconv1ds_11_linear_end_input_min_to_fp16, beta = lconv1ds_11_linear_end_input_max_to_fp16, x = hidden_states_1199_cast_fp16)[name = string("clip_327_cast_fp16")]; + tensor lconv1ds_11_linear_end_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141356032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141880384))))[name = string("lconv1ds_11_linear_end_linear_weight_to_fp16_palettized")]; + tensor linear_130_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = lconv1ds_11_linear_end_linear_weight_to_fp16_palettized, x = clip_327_cast_fp16)[name = string("linear_130_cast_fp16")]; + fp16 lconv1ds_11_linear_end_output_min_to_fp16 = const()[name = string("lconv1ds_11_linear_end_output_min_to_fp16"), val = fp16(-0x1.06p+4)]; + fp16 lconv1ds_11_linear_end_output_max_to_fp16 = const()[name = string("lconv1ds_11_linear_end_output_max_to_fp16"), val = fp16(0x1.04p+4)]; + tensor clip_328_cast_fp16 = clip(alpha = lconv1ds_11_linear_end_output_min_to_fp16, beta = lconv1ds_11_linear_end_output_max_to_fp16, x = linear_130_cast_fp16)[name = string("clip_328_cast_fp16")]; + tensor hidden_states_1205_cast_fp16 = add(x = clip_328_cast_fp16, y = hidden_states_1183_cast_fp16)[name = string("hidden_states_1205_cast_fp16")]; + string hidden_states_1205_cast_fp16_to_fp32_dtype_0 = const()[name = string("hidden_states_1205_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_5413 = const()[name = string("op_5413"), val = fp32(-0x1p-1)]; + fp32 var_5414 = const()[name = string("op_5414"), val = fp32(0x1.2a05f2p+33)]; + fp32 var_5415 = const()[name = string("op_5415"), val = fp32(-0x1.2a05f2p+33)]; + tensor hidden_states_1205_cast_fp16_to_fp32 = cast(dtype = hidden_states_1205_cast_fp16_to_fp32_dtype_0, x = hidden_states_1205_cast_fp16)[name = string("cast_13")]; + tensor clip_329 = clip(alpha = var_5415, beta = var_5414, x = hidden_states_1205_cast_fp16_to_fp32)[name = string("clip_329")]; + fp32 var_5409_promoted = const()[name = string("op_5409_promoted"), val = fp32(0x1p+1)]; + tensor var_5423 = pow(x = clip_329, y = var_5409_promoted)[name = string("op_5423")]; + tensor var_5425_axes_0 = const()[name = string("op_5425_axes_0"), val = tensor([-1])]; + bool var_5425_keep_dims_0 = const()[name = string("op_5425_keep_dims_0"), val = bool(true)]; + tensor var_5425 = reduce_mean(axes = var_5425_axes_0, keep_dims = var_5425_keep_dims_0, x = var_5423)[name = string("op_5425")]; + string var_5425_to_fp16_dtype_0 = const()[name = string("op_5425_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5426_to_fp16 = const()[name = string("op_5426_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5425_to_fp16 = cast(dtype = var_5425_to_fp16_dtype_0, x = var_5425)[name = string("cast_12")]; + tensor mean_squared_211_cast_fp16 = add(x = var_5425_to_fp16, y = var_5426_to_fp16)[name = string("mean_squared_211_cast_fp16")]; + string mean_squared_211_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_211_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_211_cast_fp16_to_fp32 = cast(dtype = mean_squared_211_cast_fp16_to_fp32_dtype_0, x = mean_squared_211_cast_fp16)[name = string("cast_11")]; + tensor var_5428 = pow(x = mean_squared_211_cast_fp16_to_fp32, y = var_5413)[name = string("op_5428")]; + string clip_329_to_fp16_dtype_0 = const()[name = string("clip_329_to_fp16_dtype_0"), val = string("fp16")]; + string var_5428_to_fp16_dtype_0 = const()[name = string("op_5428_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_329_to_fp16 = cast(dtype = clip_329_to_fp16_dtype_0, x = clip_329)[name = string("cast_9")]; + tensor var_5428_to_fp16 = cast(dtype = var_5428_to_fp16_dtype_0, x = var_5428)[name = string("cast_10")]; + tensor normed_output_421_cast_fp16 = mul(x = clip_329_to_fp16, y = var_5428_to_fp16)[name = string("normed_output_421_cast_fp16")]; + tensor const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141881472)))]; + tensor normed_output_423_cast_fp16 = mul(x = normed_output_421_cast_fp16, y = const_179_to_fp16)[name = string("normed_output_423_cast_fp16")]; + fp16 feed_forward2s_11_ffw_layer_1_input_min_to_fp16 = const()[name = string("feed_forward2s_11_ffw_layer_1_input_min_to_fp16"), val = fp16(-0x1.9cp+3)]; + fp16 feed_forward2s_11_ffw_layer_1_input_max_to_fp16 = const()[name = string("feed_forward2s_11_ffw_layer_1_input_max_to_fp16"), val = fp16(0x1.98p+3)]; + tensor clip_330_cast_fp16 = clip(alpha = feed_forward2s_11_ffw_layer_1_input_min_to_fp16, beta = feed_forward2s_11_ffw_layer_1_input_max_to_fp16, x = normed_output_423_cast_fp16)[name = string("clip_330_cast_fp16")]; + tensor feed_forward2s_11_ffw_layer_1_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141883584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143980800))))[name = string("feed_forward2s_11_ffw_layer_1_linear_weight_to_fp16_palettized")]; + tensor linear_131_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = feed_forward2s_11_ffw_layer_1_linear_weight_to_fp16_palettized, x = clip_330_cast_fp16)[name = string("linear_131_cast_fp16")]; + fp16 feed_forward2s_11_ffw_layer_1_output_min_to_fp16 = const()[name = string("feed_forward2s_11_ffw_layer_1_output_min_to_fp16"), val = fp16(-0x1.44p+5)]; + fp16 feed_forward2s_11_ffw_layer_1_output_max_to_fp16 = const()[name = string("feed_forward2s_11_ffw_layer_1_output_max_to_fp16"), val = fp16(0x1.4p+5)]; + tensor clip_331_cast_fp16 = clip(alpha = feed_forward2s_11_ffw_layer_1_output_min_to_fp16, beta = feed_forward2s_11_ffw_layer_1_output_max_to_fp16, x = linear_131_cast_fp16)[name = string("clip_331_cast_fp16")]; + tensor hidden_states_1215_cast_fp16 = silu(x = clip_331_cast_fp16)[name = string("hidden_states_1215_cast_fp16")]; + fp16 feed_forward2s_11_ffw_layer_2_input_min_to_fp16 = const()[name = string("feed_forward2s_11_ffw_layer_2_input_min_to_fp16"), val = fp16(-0x1.6ap+3)]; + fp16 feed_forward2s_11_ffw_layer_2_input_max_to_fp16 = const()[name = string("feed_forward2s_11_ffw_layer_2_input_max_to_fp16"), val = fp16(0x1.66p+3)]; + tensor clip_332_cast_fp16 = clip(alpha = feed_forward2s_11_ffw_layer_2_input_min_to_fp16, beta = feed_forward2s_11_ffw_layer_2_input_max_to_fp16, x = hidden_states_1215_cast_fp16)[name = string("clip_332_cast_fp16")]; + tensor feed_forward2s_11_ffw_layer_2_linear_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143984960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146082176))))[name = string("feed_forward2s_11_ffw_layer_2_linear_weight_to_fp16_palettized")]; + tensor linear_132_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = feed_forward2s_11_ffw_layer_2_linear_weight_to_fp16_palettized, x = clip_332_cast_fp16)[name = string("linear_132_cast_fp16")]; + fp16 feed_forward2s_11_ffw_layer_2_output_min_to_fp16 = const()[name = string("feed_forward2s_11_ffw_layer_2_output_min_to_fp16"), val = fp16(-0x1.c4p+5)]; + fp16 feed_forward2s_11_ffw_layer_2_output_max_to_fp16 = const()[name = string("feed_forward2s_11_ffw_layer_2_output_max_to_fp16"), val = fp16(0x1.cp+5)]; + tensor clip_333_cast_fp16 = clip(alpha = feed_forward2s_11_ffw_layer_2_output_min_to_fp16, beta = feed_forward2s_11_ffw_layer_2_output_max_to_fp16, x = linear_132_cast_fp16)[name = string("clip_333_cast_fp16")]; + string clip_333_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_333_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor clip_333_cast_fp16_to_fp32 = cast(dtype = clip_333_cast_fp16_to_fp32_dtype_0, x = clip_333_cast_fp16)[name = string("cast_8")]; + tensor clip_334 = clip(alpha = var_5415, beta = var_5414, x = clip_333_cast_fp16_to_fp32)[name = string("clip_334")]; + fp32 var_5409_promoted_1 = const()[name = string("op_5409_promoted_1"), val = fp32(0x1p+1)]; + tensor var_5455 = pow(x = clip_334, y = var_5409_promoted_1)[name = string("op_5455")]; + tensor var_5457_axes_0 = const()[name = string("op_5457_axes_0"), val = tensor([-1])]; + bool var_5457_keep_dims_0 = const()[name = string("op_5457_keep_dims_0"), val = bool(true)]; + tensor var_5457 = reduce_mean(axes = var_5457_axes_0, keep_dims = var_5457_keep_dims_0, x = var_5455)[name = string("op_5457")]; + string var_5457_to_fp16_dtype_0 = const()[name = string("op_5457_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5458_to_fp16 = const()[name = string("op_5458_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5457_to_fp16 = cast(dtype = var_5457_to_fp16_dtype_0, x = var_5457)[name = string("cast_7")]; + tensor mean_squared_213_cast_fp16 = add(x = var_5457_to_fp16, y = var_5458_to_fp16)[name = string("mean_squared_213_cast_fp16")]; + string mean_squared_213_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_213_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_213_cast_fp16_to_fp32 = cast(dtype = mean_squared_213_cast_fp16_to_fp32_dtype_0, x = mean_squared_213_cast_fp16)[name = string("cast_6")]; + tensor var_5460 = pow(x = mean_squared_213_cast_fp16_to_fp32, y = var_5413)[name = string("op_5460")]; + string clip_334_to_fp16_dtype_0 = const()[name = string("clip_334_to_fp16_dtype_0"), val = string("fp16")]; + string var_5460_to_fp16_dtype_0 = const()[name = string("op_5460_to_fp16_dtype_0"), val = string("fp16")]; + tensor clip_334_to_fp16 = cast(dtype = clip_334_to_fp16_dtype_0, x = clip_334)[name = string("cast_4")]; + tensor var_5460_to_fp16 = cast(dtype = var_5460_to_fp16_dtype_0, x = var_5460)[name = string("cast_5")]; + tensor normed_output_425_cast_fp16 = mul(x = clip_334_to_fp16, y = var_5460_to_fp16)[name = string("normed_output_425_cast_fp16")]; + tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146083264)))]; + tensor normed_output_427_cast_fp16 = mul(x = normed_output_425_cast_fp16, y = const_180_to_fp16)[name = string("normed_output_427_cast_fp16")]; + fp16 var_5405_to_fp16 = const()[name = string("op_5405_to_fp16"), val = fp16(0x1p-1)]; + tensor hidden_states_1227_cast_fp16 = mul(x = normed_output_427_cast_fp16, y = var_5405_to_fp16)[name = string("hidden_states_1227_cast_fp16")]; + tensor hidden_states_1229_cast_fp16 = add(x = hidden_states_1227_cast_fp16, y = hidden_states_1205_cast_fp16)[name = string("hidden_states_1229_cast_fp16")]; + fp16 var_5467_to_fp16 = const()[name = string("op_5467_to_fp16"), val = fp16(-0x1.ffcp+15)]; + fp16 var_5468_to_fp16 = const()[name = string("op_5468_to_fp16"), val = fp16(0x1.ffcp+15)]; + tensor clip_335_cast_fp16 = clip(alpha = var_5467_to_fp16, beta = var_5468_to_fp16, x = hidden_states_1229_cast_fp16)[name = string("clip_335_cast_fp16")]; + string clip_335_cast_fp16_to_fp32_dtype_0 = const()[name = string("clip_335_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + fp32 var_5470 = const()[name = string("op_5470"), val = fp32(-0x1p-1)]; + fp32 var_5474_promoted = const()[name = string("op_5474_promoted"), val = fp32(0x1p+1)]; + tensor clip_335_cast_fp16_to_fp32 = cast(dtype = clip_335_cast_fp16_to_fp32_dtype_0, x = clip_335_cast_fp16)[name = string("cast_3")]; + tensor var_5480 = pow(x = clip_335_cast_fp16_to_fp32, y = var_5474_promoted)[name = string("op_5480")]; + tensor var_5482_axes_0 = const()[name = string("op_5482_axes_0"), val = tensor([-1])]; + bool var_5482_keep_dims_0 = const()[name = string("op_5482_keep_dims_0"), val = bool(true)]; + tensor var_5482 = reduce_mean(axes = var_5482_axes_0, keep_dims = var_5482_keep_dims_0, x = var_5480)[name = string("op_5482")]; + string var_5482_to_fp16_dtype_0 = const()[name = string("op_5482_to_fp16_dtype_0"), val = string("fp16")]; + fp16 var_5483_to_fp16 = const()[name = string("op_5483_to_fp16"), val = fp16(0x1.1p-20)]; + tensor var_5482_to_fp16 = cast(dtype = var_5482_to_fp16_dtype_0, x = var_5482)[name = string("cast_2")]; + tensor mean_squared_cast_fp16 = add(x = var_5482_to_fp16, y = var_5483_to_fp16)[name = string("mean_squared_cast_fp16")]; + string mean_squared_cast_fp16_to_fp32_dtype_0 = const()[name = string("mean_squared_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; + tensor mean_squared_cast_fp16_to_fp32 = cast(dtype = mean_squared_cast_fp16_to_fp32_dtype_0, x = mean_squared_cast_fp16)[name = string("cast_1")]; + tensor var_5485 = pow(x = mean_squared_cast_fp16_to_fp32, y = var_5470)[name = string("op_5485")]; + string var_5485_to_fp16_dtype_0 = const()[name = string("op_5485_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_5485_to_fp16 = cast(dtype = var_5485_to_fp16_dtype_0, x = var_5485)[name = string("cast_0")]; + tensor normed_output_429_cast_fp16 = mul(x = clip_335_cast_fp16, y = var_5485_to_fp16)[name = string("normed_output_429_cast_fp16")]; + tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146085376)))]; + tensor hidden_states = mul(x = normed_output_429_cast_fp16, y = const_181_to_fp16)[name = string("normed_output_cast_fp16")]; + } -> (hidden_states); +} \ No newline at end of file diff --git a/audio.mlmodelc/weights/weight.bin b/audio.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e2b7538355d63833c6daf4be53fe1f81193b8f1 --- /dev/null +++ b/audio.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37da916ac6ac7911266a9c7532a681e4039aea7ce13bf570d80636b705dc6163 +size 146087488 diff --git a/audio_config.json b/audio_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3b0fb4bca21c5d65a031d6906b7bc04d2cc6d921 --- /dev/null +++ b/audio_config.json @@ -0,0 +1,19 @@ +{ + "sampling_rate": 16000, + "feature_size": 128, + "frame_length": 320, + "hop_length": 160, + "fft_length": 512, + "mel_floor": 1e-05, + "min_frequency": 0, + "max_frequency": 8000, + "log_offset": 0.001, + "preemphasis": 0.97, + "mel_frames": 200, + "num_tokens": 50, + "audio_token_id": 258881, + "boa_token_id": 256000, + "eoa_token_id": 258883, + "ms_per_token": 40, + "quantization": "int4" +} \ No newline at end of file diff --git a/chunk1.mlmodelc/analytics/coremldata.bin b/chunk1.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..0abffef7bd44682e24e07c8d917dc03a343cbb1f --- /dev/null +++ b/chunk1.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7d0c074925f5e2b23d70754135276d3b38e5bb2ebf89df153a401e37ef2f57 +size 243 diff --git a/chunk1.mlmodelc/coremldata.bin b/chunk1.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..913f44ce44134bd04ea4c8694f887d628cf14ff2 --- /dev/null +++ b/chunk1.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b53fc92f6d11bf88eb63b9a7af4a7211180e3c031115a9abfa20655814727d4 +size 1333 diff --git a/chunk1.mlmodelc/model.mil b/chunk1.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..5e2149d2cd1def034692619945c7c51459a61b0e --- /dev/null +++ b/chunk1.mlmodelc/model.mil @@ -0,0 +1,8435 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func decode_q1(tensor K_full_in, tensor K_sliding_in, tensor V_full_in, tensor V_sliding_in, tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor per_layer_raw, tensor sin_f, tensor sin_s, tensor update_mask) { + tensor per_layer_model_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13762688))))[name = string("per_layer_model_projection_weight_palettized")]; + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13773504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16395008))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16397120)))]; + tensor layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16397696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17053120))))[name = string("layers_0_self_attn_k_proj_weight_palettized")]; + tensor layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17053696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17709120))))[name = string("layers_0_self_attn_v_proj_weight_palettized")]; + tensor layers_0_self_attn_k_norm_weight = const()[name = string("layers_0_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17709696)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17710272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30817536))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30827840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43935104))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43945408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57052672))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57055296)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57060480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57388224))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57388544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60010048))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_self_attn_q_norm_weight = const()[name = string("layers_1_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60012160)))]; + tensor layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60012736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60668160))))[name = string("layers_1_self_attn_k_proj_weight_palettized")]; + tensor layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60668736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61324160))))[name = string("layers_1_self_attn_v_proj_weight_palettized")]; + tensor layers_1_self_attn_k_norm_weight = const()[name = string("layers_1_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61324736)))]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61325312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74432576))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74442880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87550144))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87560448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100667712))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100670336)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100675520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101003264))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101003584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103625088))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_self_attn_q_norm_weight = const()[name = string("layers_2_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103627200)))]; + tensor layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103627776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104283200))))[name = string("layers_2_self_attn_k_proj_weight_palettized")]; + tensor layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104283776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104939200))))[name = string("layers_2_self_attn_v_proj_weight_palettized")]; + tensor layers_2_self_attn_k_norm_weight = const()[name = string("layers_2_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104939776)))]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104940352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118047616))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118057920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131165184))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131175488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144282752))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144285376)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144290560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144618304))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144618624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147240128))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_self_attn_q_norm_weight = const()[name = string("layers_3_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147242240)))]; + tensor layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147242816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147898240))))[name = string("layers_3_self_attn_k_proj_weight_palettized")]; + tensor layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147898816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148554240))))[name = string("layers_3_self_attn_v_proj_weight_palettized")]; + tensor layers_3_self_attn_k_norm_weight = const()[name = string("layers_3_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148554816)))]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148555392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161662656))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161672960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174780224))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174790528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187897792))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187900416)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187905600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188233344))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188233664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190855168))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_self_attn_q_norm_weight = const()[name = string("layers_4_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190857280)))]; + tensor layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190857856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191513280))))[name = string("layers_4_self_attn_k_proj_weight_palettized")]; + tensor layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191513856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192169280))))[name = string("layers_4_self_attn_v_proj_weight_palettized")]; + tensor layers_4_self_attn_k_norm_weight = const()[name = string("layers_4_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192169856)))]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192170432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205277696))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205288000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218395264))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218405568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231512832))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231515456)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231520640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231848384))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231848704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237091648))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_self_attn_q_norm_weight = const()[name = string("layers_5_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237095808)))]; + tensor layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237096896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238407680))))[name = string("layers_5_self_attn_k_proj_weight_palettized")]; + tensor layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238408768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239719552))))[name = string("layers_5_self_attn_v_proj_weight_palettized")]; + tensor layers_5_self_attn_k_norm_weight = const()[name = string("layers_5_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239720640)))]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239721728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252828992))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252839296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265946560))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265956864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279064128))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279066752)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279071936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279399680))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279400000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282021504))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282023616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282679040))))[name = string("layers_6_self_attn_k_proj_weight_palettized")]; + tensor layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282679616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283335040))))[name = string("layers_6_self_attn_v_proj_weight_palettized")]; + tensor layers_6_self_attn_k_norm_weight = const()[name = string("layers_6_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283335616)))]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283336192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296443456))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296453760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309561024))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309571328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322678592))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322681216)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322686400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323014144))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323014464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325635968))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_self_attn_q_norm_weight = const()[name = string("layers_7_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325638080)))]; + tensor layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325638656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326294080))))[name = string("layers_7_self_attn_k_proj_weight_palettized")]; + tensor layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326294656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326950080))))[name = string("layers_7_self_attn_v_proj_weight_palettized")]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326950656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340057920))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340068224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353175488))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353185792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366293056))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366295680)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366300864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366628608))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366628928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369250432))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_self_attn_q_norm_weight = const()[name = string("layers_8_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369252544)))]; + tensor layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369253120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369908544))))[name = string("layers_8_self_attn_k_proj_weight_palettized")]; + tensor layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369909120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370564544))))[name = string("layers_8_self_attn_v_proj_weight_palettized")]; + tensor layers_8_self_attn_k_norm_weight = const()[name = string("layers_8_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370565120)))]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370565696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383672960))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383683264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396790528))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396800832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409908096))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409910720)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409915904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410243648))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + tensor layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410243968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412865472))))[name = string("layers_9_self_attn_q_proj_weight_palettized")]; + tensor layers_9_self_attn_q_norm_weight = const()[name = string("layers_9_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412867584)))]; + tensor layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412868160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413523584))))[name = string("layers_9_self_attn_k_proj_weight_palettized")]; + tensor layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413524160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414179584))))[name = string("layers_9_self_attn_v_proj_weight_palettized")]; + tensor layers_9_self_attn_k_norm_weight = const()[name = string("layers_9_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414180160)))]; + tensor layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414180736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427288000))))[name = string("layers_9_mlp_gate_proj_weight_palettized")]; + tensor layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427298304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440405568))))[name = string("layers_9_mlp_up_proj_weight_palettized")]; + tensor layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440415872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453523136))))[name = string("layers_9_mlp_down_proj_weight_palettized")]; + tensor layers_9_post_feedforward_layernorm_weight = const()[name = string("layers_9_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453525760)))]; + tensor layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453530944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453858688))))[name = string("layers_9_per_layer_input_gate_weight_palettized")]; + tensor layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453859008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456480512))))[name = string("layers_10_self_attn_q_proj_weight_palettized")]; + tensor layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456482624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457138048))))[name = string("layers_10_self_attn_k_proj_weight_palettized")]; + tensor layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457138624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457794048))))[name = string("layers_10_self_attn_v_proj_weight_palettized")]; + tensor layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457794624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470901888))))[name = string("layers_10_mlp_gate_proj_weight_palettized")]; + tensor layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470912192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484019456))))[name = string("layers_10_mlp_up_proj_weight_palettized")]; + tensor layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484029760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497137024))))[name = string("layers_10_mlp_down_proj_weight_palettized")]; + tensor layers_10_post_feedforward_layernorm_weight = const()[name = string("layers_10_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497139648)))]; + tensor layers_10_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497144832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497472576))))[name = string("layers_10_per_layer_input_gate_weight_palettized")]; + tensor layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497472896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502715840))))[name = string("layers_11_self_attn_q_proj_weight_palettized")]; + tensor layers_11_self_attn_q_norm_weight = const()[name = string("layers_11_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502720000)))]; + tensor layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502721088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504031872))))[name = string("layers_11_self_attn_k_proj_weight_palettized")]; + tensor layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504032960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505343744))))[name = string("layers_11_self_attn_v_proj_weight_palettized")]; + tensor layers_11_self_attn_k_norm_weight = const()[name = string("layers_11_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505344832)))]; + tensor layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505345920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518453184))))[name = string("layers_11_mlp_gate_proj_weight_palettized")]; + tensor layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518463488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531570752))))[name = string("layers_11_mlp_up_proj_weight_palettized")]; + tensor layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531581056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544688320))))[name = string("layers_11_mlp_down_proj_weight_palettized")]; + tensor layers_11_post_feedforward_layernorm_weight = const()[name = string("layers_11_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544690944)))]; + tensor layers_11_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544696128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545023872))))[name = string("layers_11_per_layer_input_gate_weight_palettized")]; + tensor var_740 = const()[name = string("op_740"), val = tensor([0, 2, 1])]; + tensor var_743_axes_0 = const()[name = string("op_743_axes_0"), val = tensor([2])]; + tensor var_741_cast_fp16 = transpose(perm = var_740, x = hidden_states)[name = string("transpose_217")]; + tensor var_743_cast_fp16 = expand_dims(axes = var_743_axes_0, x = var_741_cast_fp16)[name = string("op_743_cast_fp16")]; + string var_759_pad_type_0 = const()[name = string("op_759_pad_type_0"), val = string("valid")]; + tensor var_759_strides_0 = const()[name = string("op_759_strides_0"), val = tensor([1, 1])]; + tensor var_759_pad_0 = const()[name = string("op_759_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_759_dilations_0 = const()[name = string("op_759_dilations_0"), val = tensor([1, 1])]; + int32 var_759_groups_0 = const()[name = string("op_759_groups_0"), val = int32(1)]; + tensor var_759 = conv(dilations = var_759_dilations_0, groups = var_759_groups_0, pad = var_759_pad_0, pad_type = var_759_pad_type_0, strides = var_759_strides_0, weight = per_layer_model_projection_weight_palettized, x = var_743_cast_fp16)[name = string("op_759")]; + fp16 var_760_to_fp16 = const()[name = string("op_760_to_fp16"), val = fp16(0x1.43cp-6)]; + tensor proj_1_cast_fp16 = mul(x = var_759, y = var_760_to_fp16)[name = string("proj_1_cast_fp16")]; + tensor var_763_axes_0 = const()[name = string("op_763_axes_0"), val = tensor([2])]; + tensor var_763_cast_fp16 = squeeze(axes = var_763_axes_0, x = proj_1_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor var_767 = const()[name = string("op_767"), val = tensor([0, 2, 1])]; + tensor var_772 = const()[name = string("op_772"), val = tensor([1, 42, 256])]; + tensor proj_cast_fp16 = transpose(perm = var_767, x = var_763_cast_fp16)[name = string("transpose_216")]; + tensor proj_grouped_cast_fp16 = reshape(shape = var_772, x = proj_cast_fp16)[name = string("proj_grouped_cast_fp16")]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_774_cast_fp16 = mul(x = proj_grouped_cast_fp16, y = const_0_promoted_to_fp16)[name = string("op_774_cast_fp16")]; + int32 var_776 = const()[name = string("op_776"), val = int32(-1)]; + bool input_3_interleave_0 = const()[name = string("input_3_interleave_0"), val = bool(false)]; + tensor input_3_cast_fp16 = concat(axis = var_776, interleave = input_3_interleave_0, values = (proj_grouped_cast_fp16, var_774_cast_fp16))[name = string("input_3_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_782_to_fp16 = const()[name = string("op_782_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_782_to_fp16, x = input_3_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_785_split_sizes_0 = const()[name = string("op_785_split_sizes_0"), val = tensor([256, 256])]; + int32 var_785_axis_0 = const()[name = string("op_785_axis_0"), val = int32(-1)]; + tensor var_785_cast_fp16_0, tensor var_785_cast_fp16_1 = split(axis = var_785_axis_0, split_sizes = var_785_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_785_cast_fp16")]; + tensor per_layer_projection_norm_weight_promoted_to_fp16 = const()[name = string("per_layer_projection_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545024192)))]; + tensor var_787_cast_fp16 = mul(x = var_785_cast_fp16_0, y = per_layer_projection_norm_weight_promoted_to_fp16)[name = string("op_787_cast_fp16")]; + tensor var_791 = const()[name = string("op_791"), val = tensor([1, 1, 10752])]; + tensor proj_normed_cast_fp16 = reshape(shape = var_791, x = var_787_cast_fp16)[name = string("proj_normed_cast_fp16")]; + tensor var_794_cast_fp16 = add(x = proj_normed_cast_fp16, y = per_layer_raw)[name = string("op_794_cast_fp16")]; + fp16 var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = fp16(0x1.6ap-1)]; + tensor per_layer_combined_out = mul(x = var_794_cast_fp16, y = var_795_to_fp16)[name = string("per_layer_combined_cast_fp16")]; + tensor var_799_begin_0 = const()[name = string("op_799_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_799_end_0 = const()[name = string("op_799_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_799_end_mask_0 = const()[name = string("op_799_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_799_squeeze_mask_0 = const()[name = string("op_799_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_799_cast_fp16 = slice_by_index(begin = var_799_begin_0, end = var_799_end_0, end_mask = var_799_end_mask_0, squeeze_mask = var_799_squeeze_mask_0, x = K_sliding_in)[name = string("op_799_cast_fp16")]; + tensor K_sliding_slot_1_axes_0 = const()[name = string("K_sliding_slot_1_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_1_cast_fp16 = expand_dims(axes = K_sliding_slot_1_axes_0, x = var_799_cast_fp16)[name = string("K_sliding_slot_1_cast_fp16")]; + tensor var_804_begin_0 = const()[name = string("op_804_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_804_end_0 = const()[name = string("op_804_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_804_end_mask_0 = const()[name = string("op_804_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_804_squeeze_mask_0 = const()[name = string("op_804_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_804_cast_fp16 = slice_by_index(begin = var_804_begin_0, end = var_804_end_0, end_mask = var_804_end_mask_0, squeeze_mask = var_804_squeeze_mask_0, x = V_sliding_in)[name = string("op_804_cast_fp16")]; + tensor V_sliding_slot_1_axes_0 = const()[name = string("V_sliding_slot_1_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_1_cast_fp16 = expand_dims(axes = V_sliding_slot_1_axes_0, x = var_804_cast_fp16)[name = string("V_sliding_slot_1_cast_fp16")]; + int32 var_811 = const()[name = string("op_811"), val = int32(-1)]; + fp16 const_1_promoted_to_fp16 = const()[name = string("const_1_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_813_cast_fp16 = mul(x = hidden_states, y = const_1_promoted_to_fp16)[name = string("op_813_cast_fp16")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5_cast_fp16 = concat(axis = var_811, interleave = input_5_interleave_0, values = (hidden_states, var_813_cast_fp16))[name = string("input_5_cast_fp16")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_808_to_fp16 = const()[name = string("op_808_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_808_to_fp16, x = input_5_cast_fp16)[name = string("normed_5_cast_fp16")]; + tensor var_818_split_sizes_0 = const()[name = string("op_818_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_818_axis_0 = const()[name = string("op_818_axis_0"), val = int32(-1)]; + tensor var_818_cast_fp16_0, tensor var_818_cast_fp16_1 = split(axis = var_818_axis_0, split_sizes = var_818_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_818_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545024768)))]; + tensor h_1_cast_fp16 = mul(x = var_818_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_824 = const()[name = string("op_824"), val = tensor([0, 2, 1])]; + tensor var_827_axes_0 = const()[name = string("op_827_axes_0"), val = tensor([2])]; + tensor var_825_cast_fp16 = transpose(perm = var_824, x = h_1_cast_fp16)[name = string("transpose_215")]; + tensor var_827_cast_fp16 = expand_dims(axes = var_827_axes_0, x = var_825_cast_fp16)[name = string("op_827_cast_fp16")]; + string var_843_pad_type_0 = const()[name = string("op_843_pad_type_0"), val = string("valid")]; + tensor var_843_strides_0 = const()[name = string("op_843_strides_0"), val = tensor([1, 1])]; + tensor var_843_pad_0 = const()[name = string("op_843_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_843_dilations_0 = const()[name = string("op_843_dilations_0"), val = tensor([1, 1])]; + int32 var_843_groups_0 = const()[name = string("op_843_groups_0"), val = int32(1)]; + tensor var_843 = conv(dilations = var_843_dilations_0, groups = var_843_groups_0, pad = var_843_pad_0, pad_type = var_843_pad_type_0, strides = var_843_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_827_cast_fp16)[name = string("op_843")]; + tensor var_848 = const()[name = string("op_848"), val = tensor([1, 8, 256, 1])]; + tensor var_849 = reshape(shape = var_848, x = var_843)[name = string("op_849")]; + tensor var_854 = const()[name = string("op_854"), val = tensor([0, 1, 3, 2])]; + tensor var_864 = const()[name = string("op_864"), val = tensor([1, 8, 256])]; + tensor var_855 = transpose(perm = var_854, x = var_849)[name = string("transpose_214")]; + tensor x_1 = reshape(shape = var_864, x = var_855)[name = string("x_1")]; + int32 var_870 = const()[name = string("op_870"), val = int32(-1)]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_872 = mul(x = x_1, y = const_2_promoted)[name = string("op_872")]; + bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; + tensor input_9 = concat(axis = var_870, interleave = input_9_interleave_0, values = (x_1, var_872))[name = string("input_9")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_867_to_fp16 = const()[name = string("op_867_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_867_to_fp16, x = input_9)[name = string("normed_9_cast_fp16")]; + tensor var_877_split_sizes_0 = const()[name = string("op_877_split_sizes_0"), val = tensor([256, 256])]; + int32 var_877_axis_0 = const()[name = string("op_877_axis_0"), val = int32(-1)]; + tensor var_877_0, tensor var_877_1 = split(axis = var_877_axis_0, split_sizes = var_877_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_877")]; + tensor var_879 = mul(x = var_877_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_879")]; + tensor var_884 = const()[name = string("op_884"), val = tensor([1, 8, 1, 256])]; + tensor q_3 = reshape(shape = var_884, x = var_879)[name = string("q_3")]; + tensor var_886_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_886_cast_fp16")]; + tensor var_887_split_sizes_0 = const()[name = string("op_887_split_sizes_0"), val = tensor([128, 128])]; + int32 var_887_axis_0 = const()[name = string("op_887_axis_0"), val = int32(-1)]; + tensor var_887_0, tensor var_887_1 = split(axis = var_887_axis_0, split_sizes = var_887_split_sizes_0, x = q_3)[name = string("op_887")]; + fp16 const_3_promoted = const()[name = string("const_3_promoted"), val = fp16(-0x1p+0)]; + tensor var_889 = mul(x = var_887_1, y = const_3_promoted)[name = string("op_889")]; + int32 var_891 = const()[name = string("op_891"), val = int32(-1)]; + bool var_892_interleave_0 = const()[name = string("op_892_interleave_0"), val = bool(false)]; + tensor var_892 = concat(axis = var_891, interleave = var_892_interleave_0, values = (var_889, var_887_0))[name = string("op_892")]; + tensor var_893_cast_fp16 = mul(x = var_892, y = sin_s)[name = string("op_893_cast_fp16")]; + tensor q_7_cast_fp16 = add(x = var_886_cast_fp16, y = var_893_cast_fp16)[name = string("q_7_cast_fp16")]; + string var_906_pad_type_0 = const()[name = string("op_906_pad_type_0"), val = string("valid")]; + tensor var_906_strides_0 = const()[name = string("op_906_strides_0"), val = tensor([1, 1])]; + tensor var_906_pad_0 = const()[name = string("op_906_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_906_dilations_0 = const()[name = string("op_906_dilations_0"), val = tensor([1, 1])]; + int32 var_906_groups_0 = const()[name = string("op_906_groups_0"), val = int32(1)]; + tensor var_906 = conv(dilations = var_906_dilations_0, groups = var_906_groups_0, pad = var_906_pad_0, pad_type = var_906_pad_type_0, strides = var_906_strides_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = var_827_cast_fp16)[name = string("op_906")]; + tensor var_911 = const()[name = string("op_911"), val = tensor([1, 2, 256, 1])]; + tensor var_912 = reshape(shape = var_911, x = var_906)[name = string("op_912")]; + tensor var_917 = const()[name = string("op_917"), val = tensor([0, 1, 3, 2])]; + string var_934_pad_type_0 = const()[name = string("op_934_pad_type_0"), val = string("valid")]; + tensor var_934_strides_0 = const()[name = string("op_934_strides_0"), val = tensor([1, 1])]; + tensor var_934_pad_0 = const()[name = string("op_934_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_934_dilations_0 = const()[name = string("op_934_dilations_0"), val = tensor([1, 1])]; + int32 var_934_groups_0 = const()[name = string("op_934_groups_0"), val = int32(1)]; + tensor var_934 = conv(dilations = var_934_dilations_0, groups = var_934_groups_0, pad = var_934_pad_0, pad_type = var_934_pad_type_0, strides = var_934_strides_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = var_827_cast_fp16)[name = string("op_934")]; + tensor var_939 = const()[name = string("op_939"), val = tensor([1, 2, 256, 1])]; + tensor var_940 = reshape(shape = var_939, x = var_934)[name = string("op_940")]; + tensor var_945 = const()[name = string("op_945"), val = tensor([0, 1, 3, 2])]; + tensor var_955 = const()[name = string("op_955"), val = tensor([1, 2, 256])]; + tensor var_918 = transpose(perm = var_917, x = var_912)[name = string("transpose_213")]; + tensor x_3 = reshape(shape = var_955, x = var_918)[name = string("x_3")]; + int32 var_961 = const()[name = string("op_961"), val = int32(-1)]; + fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; + tensor var_963 = mul(x = x_3, y = const_4_promoted)[name = string("op_963")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11 = concat(axis = var_961, interleave = input_11_interleave_0, values = (x_3, var_963))[name = string("input_11")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_958_to_fp16 = const()[name = string("op_958_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_958_to_fp16, x = input_11)[name = string("normed_13_cast_fp16")]; + tensor var_968_split_sizes_0 = const()[name = string("op_968_split_sizes_0"), val = tensor([256, 256])]; + int32 var_968_axis_0 = const()[name = string("op_968_axis_0"), val = int32(-1)]; + tensor var_968_0, tensor var_968_1 = split(axis = var_968_axis_0, split_sizes = var_968_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_968")]; + tensor var_970 = mul(x = var_968_0, y = layers_0_self_attn_k_norm_weight)[name = string("op_970")]; + tensor var_975 = const()[name = string("op_975"), val = tensor([1, 2, 1, 256])]; + tensor q_5 = reshape(shape = var_975, x = var_970)[name = string("q_5")]; + fp16 var_977_promoted = const()[name = string("op_977_promoted"), val = fp16(0x1p+1)]; + tensor var_946 = transpose(perm = var_945, x = var_940)[name = string("transpose_212")]; + tensor var_978 = pow(x = var_946, y = var_977_promoted)[name = string("op_978")]; + tensor var_983_axes_0 = const()[name = string("op_983_axes_0"), val = tensor([-1])]; + bool var_983_keep_dims_0 = const()[name = string("op_983_keep_dims_0"), val = bool(true)]; + tensor var_983 = reduce_mean(axes = var_983_axes_0, keep_dims = var_983_keep_dims_0, x = var_978)[name = string("op_983")]; + fp16 var_985_to_fp16 = const()[name = string("op_985_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_1_cast_fp16 = add(x = var_983, y = var_985_to_fp16)[name = string("mean_sq_1_cast_fp16")]; + fp32 var_987_epsilon_0 = const()[name = string("op_987_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_987_cast_fp16 = rsqrt(epsilon = var_987_epsilon_0, x = mean_sq_1_cast_fp16)[name = string("op_987_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_946, y = var_987_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_989_cast_fp16 = mul(x = q_5, y = cos_s)[name = string("op_989_cast_fp16")]; + tensor var_990_split_sizes_0 = const()[name = string("op_990_split_sizes_0"), val = tensor([128, 128])]; + int32 var_990_axis_0 = const()[name = string("op_990_axis_0"), val = int32(-1)]; + tensor var_990_0, tensor var_990_1 = split(axis = var_990_axis_0, split_sizes = var_990_split_sizes_0, x = q_5)[name = string("op_990")]; + fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; + tensor var_992 = mul(x = var_990_1, y = const_5_promoted)[name = string("op_992")]; + int32 var_994 = const()[name = string("op_994"), val = int32(-1)]; + bool var_995_interleave_0 = const()[name = string("op_995_interleave_0"), val = bool(false)]; + tensor var_995 = concat(axis = var_994, interleave = var_995_interleave_0, values = (var_992, var_990_0))[name = string("op_995")]; + tensor var_996_cast_fp16 = mul(x = var_995, y = sin_s)[name = string("op_996_cast_fp16")]; + tensor input_13_cast_fp16 = add(x = var_989_cast_fp16, y = var_996_cast_fp16)[name = string("input_13_cast_fp16")]; + tensor k_padded_1_pad_0 = const()[name = string("k_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_1_mode_0 = const()[name = string("k_padded_1_mode_0"), val = string("constant")]; + fp16 const_6_to_fp16 = const()[name = string("const_6_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_1_cast_fp16 = pad(constant_val = const_6_to_fp16, mode = k_padded_1_mode_0, pad = k_padded_1_pad_0, x = input_13_cast_fp16)[name = string("k_padded_1_cast_fp16")]; + tensor v_padded_1_pad_0 = const()[name = string("v_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_1_mode_0 = const()[name = string("v_padded_1_mode_0"), val = string("constant")]; + fp16 const_7_to_fp16 = const()[name = string("const_7_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_1_cast_fp16 = pad(constant_val = const_7_to_fp16, mode = v_padded_1_mode_0, pad = v_padded_1_pad_0, x = input_15_cast_fp16)[name = string("v_padded_1_cast_fp16")]; + tensor var_1025_begin_0 = const()[name = string("op_1025_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1025_end_0 = const()[name = string("op_1025_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1025_end_mask_0 = const()[name = string("op_1025_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1025_cast_fp16 = slice_by_index(begin = var_1025_begin_0, end = var_1025_end_0, end_mask = var_1025_end_mask_0, x = K_sliding_slot_1_cast_fp16)[name = string("op_1025_cast_fp16")]; + int32 var_1032 = const()[name = string("op_1032"), val = int32(2)]; + bool K_sliding_out_1_interleave_0 = const()[name = string("K_sliding_out_1_interleave_0"), val = bool(false)]; + tensor K_sliding_out_1_cast_fp16 = concat(axis = var_1032, interleave = K_sliding_out_1_interleave_0, values = (var_1025_cast_fp16, k_padded_1_cast_fp16))[name = string("K_sliding_out_1_cast_fp16")]; + tensor var_1048_begin_0 = const()[name = string("op_1048_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1048_end_0 = const()[name = string("op_1048_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1048_end_mask_0 = const()[name = string("op_1048_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1048_cast_fp16 = slice_by_index(begin = var_1048_begin_0, end = var_1048_end_0, end_mask = var_1048_end_mask_0, x = V_sliding_slot_1_cast_fp16)[name = string("op_1048_cast_fp16")]; + int32 var_1055 = const()[name = string("op_1055"), val = int32(2)]; + bool V_sliding_out_1_interleave_0 = const()[name = string("V_sliding_out_1_interleave_0"), val = bool(false)]; + tensor V_sliding_out_1_cast_fp16 = concat(axis = var_1055, interleave = V_sliding_out_1_interleave_0, values = (var_1048_cast_fp16, v_padded_1_cast_fp16))[name = string("V_sliding_out_1_cast_fp16")]; + tensor K_for_attn_1_begin_0 = const()[name = string("K_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_1_end_0 = const()[name = string("K_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_1_end_mask_0 = const()[name = string("K_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_1_cast_fp16 = slice_by_index(begin = K_for_attn_1_begin_0, end = K_for_attn_1_end_0, end_mask = K_for_attn_1_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("K_for_attn_1_cast_fp16")]; + tensor V_for_attn_1_begin_0 = const()[name = string("V_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_1_end_0 = const()[name = string("V_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_1_end_mask_0 = const()[name = string("V_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_1_cast_fp16 = slice_by_index(begin = V_for_attn_1_begin_0, end = V_for_attn_1_end_0, end_mask = V_for_attn_1_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("V_for_attn_1_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = K_for_attn_1_cast_fp16)[name = string("transpose_211")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_210")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = V_for_attn_1_cast_fp16)[name = string("transpose_209")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_208")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_48_cast_fp16 = transpose(perm = transpose_48_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_207")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_7_cast_fp16, y = transpose_48_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_7_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_7_cast_fp16)[name = string("reduce_max_0")]; + tensor var_1096 = sub(x = x_7_cast_fp16, y = reduce_max_0)[name = string("op_1096")]; + tensor var_1102 = exp(x = var_1096)[name = string("op_1102")]; + tensor var_1112_axes_0 = const()[name = string("op_1112_axes_0"), val = tensor([-1])]; + bool var_1112_keep_dims_0 = const()[name = string("op_1112_keep_dims_0"), val = bool(true)]; + tensor var_1112 = reduce_sum(axes = var_1112_axes_0, keep_dims = var_1112_keep_dims_0, x = var_1102)[name = string("op_1112")]; + tensor var_1118_cast_fp16 = real_div(x = var_1102, y = var_1112)[name = string("op_1118_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_206")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_1118_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_1129 = const()[name = string("op_1129"), val = tensor([0, 2, 1, 3])]; + tensor var_1136 = const()[name = string("op_1136"), val = tensor([1, 1, -1])]; + tensor var_1130_cast_fp16 = transpose(perm = var_1129, x = attn_output_1_cast_fp16)[name = string("transpose_205")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_1136, x = var_1130_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_1141 = const()[name = string("op_1141"), val = tensor([0, 2, 1])]; + string var_1157_pad_type_0 = const()[name = string("op_1157_pad_type_0"), val = string("valid")]; + int32 var_1157_groups_0 = const()[name = string("op_1157_groups_0"), val = int32(1)]; + tensor var_1157_strides_0 = const()[name = string("op_1157_strides_0"), val = tensor([1])]; + tensor var_1157_pad_0 = const()[name = string("op_1157_pad_0"), val = tensor([0, 0])]; + tensor var_1157_dilations_0 = const()[name = string("op_1157_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545029952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547651456))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1142_cast_fp16 = transpose(perm = var_1141, x = attn_output_3_cast_fp16)[name = string("transpose_204")]; + tensor var_1157_cast_fp16 = conv(dilations = var_1157_dilations_0, groups = var_1157_groups_0, pad = var_1157_pad_0, pad_type = var_1157_pad_type_0, strides = var_1157_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1142_cast_fp16)[name = string("op_1157_cast_fp16")]; + tensor var_1161 = const()[name = string("op_1161"), val = tensor([0, 2, 1])]; + int32 var_1167 = const()[name = string("op_1167"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_11_cast_fp16 = transpose(perm = var_1161, x = var_1157_cast_fp16)[name = string("transpose_203")]; + tensor var_1169_cast_fp16 = mul(x = x_11_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_1169_cast_fp16")]; + bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; + tensor input_19_cast_fp16 = concat(axis = var_1167, interleave = input_19_interleave_0, values = (x_11_cast_fp16, var_1169_cast_fp16))[name = string("input_19_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_1164_to_fp16 = const()[name = string("op_1164_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_1164_to_fp16, x = input_19_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor var_1174_split_sizes_0 = const()[name = string("op_1174_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1174_axis_0 = const()[name = string("op_1174_axis_0"), val = int32(-1)]; + tensor var_1174_cast_fp16_0, tensor var_1174_cast_fp16_1 = split(axis = var_1174_axis_0, split_sizes = var_1174_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_1174_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547654080)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_1174_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_13_cast_fp16")]; + int32 var_1183 = const()[name = string("op_1183"), val = int32(-1)]; + fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1185_cast_fp16 = mul(x = x_13_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_1185_cast_fp16")]; + bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; + tensor input_21_cast_fp16 = concat(axis = var_1183, interleave = input_21_interleave_0, values = (x_13_cast_fp16, var_1185_cast_fp16))[name = string("input_21_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_1180_to_fp16 = const()[name = string("op_1180_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_1180_to_fp16, x = input_21_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor var_1190_split_sizes_0 = const()[name = string("op_1190_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1190_axis_0 = const()[name = string("op_1190_axis_0"), val = int32(-1)]; + tensor var_1190_cast_fp16_0, tensor var_1190_cast_fp16_1 = split(axis = var_1190_axis_0, split_sizes = var_1190_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_1190_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547659264)))]; + tensor h_3_cast_fp16 = mul(x = var_1190_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_1201 = const()[name = string("op_1201"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_1202 = transpose(perm = var_1201, x = h_3_cast_fp16)[name = string("transpose_202")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_1202)[name = string("input_23")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_23)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_25 = mul(x = gate_3, y = up_1)[name = string("input_25")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_25)[name = string("mlp_out_1")]; + tensor var_1242_axes_0 = const()[name = string("op_1242_axes_0"), val = tensor([2])]; + tensor var_1242 = squeeze(axes = var_1242_axes_0, x = mlp_out_1)[name = string("op_1242")]; + tensor var_1246 = const()[name = string("op_1246"), val = tensor([0, 2, 1])]; + int32 var_1252 = const()[name = string("op_1252"), val = int32(-1)]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor x_15 = transpose(perm = var_1246, x = var_1242)[name = string("transpose_201")]; + tensor var_1254 = mul(x = x_15, y = const_10_promoted)[name = string("op_1254")]; + bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; + tensor input_27 = concat(axis = var_1252, interleave = input_27_interleave_0, values = (x_15, var_1254))[name = string("input_27")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_1249_to_fp16 = const()[name = string("op_1249_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1249_to_fp16, x = input_27)[name = string("normed_25_cast_fp16")]; + tensor var_1259_split_sizes_0 = const()[name = string("op_1259_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1259_axis_0 = const()[name = string("op_1259_axis_0"), val = int32(-1)]; + tensor var_1259_0, tensor var_1259_1 = split(axis = var_1259_axis_0, split_sizes = var_1259_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1259")]; + tensor hidden_states_3 = mul(x = var_1259_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_13_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 0])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 1, 256])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_1287 = const()[name = string("op_1287"), val = tensor([0, 2, 1])]; + tensor input_29_axes_0 = const()[name = string("input_29_axes_0"), val = tensor([2])]; + tensor var_1288 = transpose(perm = var_1287, x = hidden_states_5_cast_fp16)[name = string("transpose_200")]; + tensor input_29 = expand_dims(axes = input_29_axes_0, x = var_1288)[name = string("input_29")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_29)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_1307 = const()[name = string("op_1307"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_1308_cast_fp16 = transpose(perm = var_1307, x = per_layer_slice_1_cast_fp16)[name = string("transpose_199")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_1308_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_31_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547664448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547992192))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_31_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_1324_axes_0 = const()[name = string("op_1324_axes_0"), val = tensor([2])]; + tensor var_1324_cast_fp16 = squeeze(axes = var_1324_axes_0, x = gated_5_cast_fp16)[name = string("op_1324_cast_fp16")]; + tensor var_1328 = const()[name = string("op_1328"), val = tensor([0, 2, 1])]; + int32 var_1334 = const()[name = string("op_1334"), val = int32(-1)]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_17_cast_fp16 = transpose(perm = var_1328, x = var_1324_cast_fp16)[name = string("transpose_198")]; + tensor var_1336_cast_fp16 = mul(x = x_17_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1336_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_1334, interleave = input_33_interleave_0, values = (x_17_cast_fp16, var_1336_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_1331_to_fp16 = const()[name = string("op_1331_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1331_to_fp16, x = input_33_cast_fp16)[name = string("normed_29_cast_fp16")]; + tensor var_1341_split_sizes_0 = const()[name = string("op_1341_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1341_axis_0 = const()[name = string("op_1341_axis_0"), val = int32(-1)]; + tensor var_1341_cast_fp16_0, tensor var_1341_cast_fp16_1 = split(axis = var_1341_axis_0, split_sizes = var_1341_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1341_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547994816)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_1341_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = tensor([0x1.f4p-5])]; + tensor x_19_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_12_promoted_to_fp16)[name = string("x_19_cast_fp16")]; + tensor var_1353_axes_0 = const()[name = string("op_1353_axes_0"), val = tensor([0])]; + tensor var_1353_cast_fp16 = squeeze(axes = var_1353_axes_0, x = K_sliding_out_1_cast_fp16)[name = string("op_1353_cast_fp16")]; + tensor var_1355_axes_0 = const()[name = string("op_1355_axes_0"), val = tensor([0])]; + tensor var_1355_cast_fp16 = squeeze(axes = var_1355_axes_0, x = V_sliding_out_1_cast_fp16)[name = string("op_1355_cast_fp16")]; + tensor var_1358_begin_0 = const()[name = string("op_1358_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1358_end_0 = const()[name = string("op_1358_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1358_end_mask_0 = const()[name = string("op_1358_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1358_squeeze_mask_0 = const()[name = string("op_1358_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1358_cast_fp16 = slice_by_index(begin = var_1358_begin_0, end = var_1358_end_0, end_mask = var_1358_end_mask_0, squeeze_mask = var_1358_squeeze_mask_0, x = K_sliding_in)[name = string("op_1358_cast_fp16")]; + tensor K_sliding_slot_3_axes_0 = const()[name = string("K_sliding_slot_3_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_3_cast_fp16 = expand_dims(axes = K_sliding_slot_3_axes_0, x = var_1358_cast_fp16)[name = string("K_sliding_slot_3_cast_fp16")]; + tensor var_1363_begin_0 = const()[name = string("op_1363_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1363_end_0 = const()[name = string("op_1363_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1363_end_mask_0 = const()[name = string("op_1363_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1363_squeeze_mask_0 = const()[name = string("op_1363_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1363_cast_fp16 = slice_by_index(begin = var_1363_begin_0, end = var_1363_end_0, end_mask = var_1363_end_mask_0, squeeze_mask = var_1363_squeeze_mask_0, x = V_sliding_in)[name = string("op_1363_cast_fp16")]; + tensor V_sliding_slot_3_axes_0 = const()[name = string("V_sliding_slot_3_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_3_cast_fp16 = expand_dims(axes = V_sliding_slot_3_axes_0, x = var_1363_cast_fp16)[name = string("V_sliding_slot_3_cast_fp16")]; + int32 var_1370 = const()[name = string("op_1370"), val = int32(-1)]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1372_cast_fp16 = mul(x = x_19_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1372_cast_fp16")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35_cast_fp16 = concat(axis = var_1370, interleave = input_35_interleave_0, values = (x_19_cast_fp16, var_1372_cast_fp16))[name = string("input_35_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1367_to_fp16 = const()[name = string("op_1367_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1367_to_fp16, x = input_35_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor var_1377_split_sizes_0 = const()[name = string("op_1377_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1377_axis_0 = const()[name = string("op_1377_axis_0"), val = int32(-1)]; + tensor var_1377_cast_fp16_0, tensor var_1377_cast_fp16_1 = split(axis = var_1377_axis_0, split_sizes = var_1377_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1377_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548000000)))]; + tensor h_7_cast_fp16 = mul(x = var_1377_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_1383 = const()[name = string("op_1383"), val = tensor([0, 2, 1])]; + tensor var_1386_axes_0 = const()[name = string("op_1386_axes_0"), val = tensor([2])]; + tensor var_1384_cast_fp16 = transpose(perm = var_1383, x = h_7_cast_fp16)[name = string("transpose_197")]; + tensor var_1386_cast_fp16 = expand_dims(axes = var_1386_axes_0, x = var_1384_cast_fp16)[name = string("op_1386_cast_fp16")]; + string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")]; + tensor var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor([1, 1])]; + tensor var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor([1, 1])]; + int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)]; + tensor var_1402 = conv(dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_1386_cast_fp16)[name = string("op_1402")]; + tensor var_1407 = const()[name = string("op_1407"), val = tensor([1, 8, 256, 1])]; + tensor var_1408 = reshape(shape = var_1407, x = var_1402)[name = string("op_1408")]; + tensor var_1413 = const()[name = string("op_1413"), val = tensor([0, 1, 3, 2])]; + tensor var_1423 = const()[name = string("op_1423"), val = tensor([1, 8, 256])]; + tensor var_1414 = transpose(perm = var_1413, x = var_1408)[name = string("transpose_196")]; + tensor x_21 = reshape(shape = var_1423, x = var_1414)[name = string("x_21")]; + int32 var_1429 = const()[name = string("op_1429"), val = int32(-1)]; + fp16 const_14_promoted = const()[name = string("const_14_promoted"), val = fp16(-0x1p+0)]; + tensor var_1431 = mul(x = x_21, y = const_14_promoted)[name = string("op_1431")]; + bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; + tensor input_39 = concat(axis = var_1429, interleave = input_39_interleave_0, values = (x_21, var_1431))[name = string("input_39")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1426_to_fp16 = const()[name = string("op_1426_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1426_to_fp16, x = input_39)[name = string("normed_37_cast_fp16")]; + tensor var_1436_split_sizes_0 = const()[name = string("op_1436_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1436_axis_0 = const()[name = string("op_1436_axis_0"), val = int32(-1)]; + tensor var_1436_0, tensor var_1436_1 = split(axis = var_1436_axis_0, split_sizes = var_1436_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1436")]; + tensor var_1438 = mul(x = var_1436_0, y = layers_1_self_attn_q_norm_weight)[name = string("op_1438")]; + tensor var_1443 = const()[name = string("op_1443"), val = tensor([1, 8, 1, 256])]; + tensor q_11 = reshape(shape = var_1443, x = var_1438)[name = string("q_11")]; + tensor var_1445_cast_fp16 = mul(x = q_11, y = cos_s)[name = string("op_1445_cast_fp16")]; + tensor var_1446_split_sizes_0 = const()[name = string("op_1446_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1446_axis_0 = const()[name = string("op_1446_axis_0"), val = int32(-1)]; + tensor var_1446_0, tensor var_1446_1 = split(axis = var_1446_axis_0, split_sizes = var_1446_split_sizes_0, x = q_11)[name = string("op_1446")]; + fp16 const_15_promoted = const()[name = string("const_15_promoted"), val = fp16(-0x1p+0)]; + tensor var_1448 = mul(x = var_1446_1, y = const_15_promoted)[name = string("op_1448")]; + int32 var_1450 = const()[name = string("op_1450"), val = int32(-1)]; + bool var_1451_interleave_0 = const()[name = string("op_1451_interleave_0"), val = bool(false)]; + tensor var_1451 = concat(axis = var_1450, interleave = var_1451_interleave_0, values = (var_1448, var_1446_0))[name = string("op_1451")]; + tensor var_1452_cast_fp16 = mul(x = var_1451, y = sin_s)[name = string("op_1452_cast_fp16")]; + tensor q_15_cast_fp16 = add(x = var_1445_cast_fp16, y = var_1452_cast_fp16)[name = string("q_15_cast_fp16")]; + string var_1465_pad_type_0 = const()[name = string("op_1465_pad_type_0"), val = string("valid")]; + tensor var_1465_strides_0 = const()[name = string("op_1465_strides_0"), val = tensor([1, 1])]; + tensor var_1465_pad_0 = const()[name = string("op_1465_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1465_dilations_0 = const()[name = string("op_1465_dilations_0"), val = tensor([1, 1])]; + int32 var_1465_groups_0 = const()[name = string("op_1465_groups_0"), val = int32(1)]; + tensor var_1465 = conv(dilations = var_1465_dilations_0, groups = var_1465_groups_0, pad = var_1465_pad_0, pad_type = var_1465_pad_type_0, strides = var_1465_strides_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = var_1386_cast_fp16)[name = string("op_1465")]; + tensor var_1470 = const()[name = string("op_1470"), val = tensor([1, 2, 256, 1])]; + tensor var_1471 = reshape(shape = var_1470, x = var_1465)[name = string("op_1471")]; + tensor var_1476 = const()[name = string("op_1476"), val = tensor([0, 1, 3, 2])]; + string var_1493_pad_type_0 = const()[name = string("op_1493_pad_type_0"), val = string("valid")]; + tensor var_1493_strides_0 = const()[name = string("op_1493_strides_0"), val = tensor([1, 1])]; + tensor var_1493_pad_0 = const()[name = string("op_1493_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1493_dilations_0 = const()[name = string("op_1493_dilations_0"), val = tensor([1, 1])]; + int32 var_1493_groups_0 = const()[name = string("op_1493_groups_0"), val = int32(1)]; + tensor var_1493 = conv(dilations = var_1493_dilations_0, groups = var_1493_groups_0, pad = var_1493_pad_0, pad_type = var_1493_pad_type_0, strides = var_1493_strides_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = var_1386_cast_fp16)[name = string("op_1493")]; + tensor var_1498 = const()[name = string("op_1498"), val = tensor([1, 2, 256, 1])]; + tensor var_1499 = reshape(shape = var_1498, x = var_1493)[name = string("op_1499")]; + tensor var_1504 = const()[name = string("op_1504"), val = tensor([0, 1, 3, 2])]; + tensor var_1514 = const()[name = string("op_1514"), val = tensor([1, 2, 256])]; + tensor var_1477 = transpose(perm = var_1476, x = var_1471)[name = string("transpose_195")]; + tensor x_23 = reshape(shape = var_1514, x = var_1477)[name = string("x_23")]; + int32 var_1520 = const()[name = string("op_1520"), val = int32(-1)]; + fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)]; + tensor var_1522 = mul(x = x_23, y = const_16_promoted)[name = string("op_1522")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_1520, interleave = input_41_interleave_0, values = (x_23, var_1522))[name = string("input_41")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1517_to_fp16 = const()[name = string("op_1517_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1517_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")]; + tensor var_1527_split_sizes_0 = const()[name = string("op_1527_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1527_axis_0 = const()[name = string("op_1527_axis_0"), val = int32(-1)]; + tensor var_1527_0, tensor var_1527_1 = split(axis = var_1527_axis_0, split_sizes = var_1527_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1527")]; + tensor var_1529 = mul(x = var_1527_0, y = layers_1_self_attn_k_norm_weight)[name = string("op_1529")]; + tensor var_1534 = const()[name = string("op_1534"), val = tensor([1, 2, 1, 256])]; + tensor q_13 = reshape(shape = var_1534, x = var_1529)[name = string("q_13")]; + fp16 var_1536_promoted = const()[name = string("op_1536_promoted"), val = fp16(0x1p+1)]; + tensor var_1505 = transpose(perm = var_1504, x = var_1499)[name = string("transpose_194")]; + tensor var_1537 = pow(x = var_1505, y = var_1536_promoted)[name = string("op_1537")]; + tensor var_1542_axes_0 = const()[name = string("op_1542_axes_0"), val = tensor([-1])]; + bool var_1542_keep_dims_0 = const()[name = string("op_1542_keep_dims_0"), val = bool(true)]; + tensor var_1542 = reduce_mean(axes = var_1542_axes_0, keep_dims = var_1542_keep_dims_0, x = var_1537)[name = string("op_1542")]; + fp16 var_1544_to_fp16 = const()[name = string("op_1544_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_3_cast_fp16 = add(x = var_1542, y = var_1544_to_fp16)[name = string("mean_sq_3_cast_fp16")]; + fp32 var_1546_epsilon_0 = const()[name = string("op_1546_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1546_cast_fp16 = rsqrt(epsilon = var_1546_epsilon_0, x = mean_sq_3_cast_fp16)[name = string("op_1546_cast_fp16")]; + tensor input_45_cast_fp16 = mul(x = var_1505, y = var_1546_cast_fp16)[name = string("input_45_cast_fp16")]; + tensor var_1548_cast_fp16 = mul(x = q_13, y = cos_s)[name = string("op_1548_cast_fp16")]; + tensor var_1549_split_sizes_0 = const()[name = string("op_1549_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1549_axis_0 = const()[name = string("op_1549_axis_0"), val = int32(-1)]; + tensor var_1549_0, tensor var_1549_1 = split(axis = var_1549_axis_0, split_sizes = var_1549_split_sizes_0, x = q_13)[name = string("op_1549")]; + fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; + tensor var_1551 = mul(x = var_1549_1, y = const_17_promoted)[name = string("op_1551")]; + int32 var_1553 = const()[name = string("op_1553"), val = int32(-1)]; + bool var_1554_interleave_0 = const()[name = string("op_1554_interleave_0"), val = bool(false)]; + tensor var_1554 = concat(axis = var_1553, interleave = var_1554_interleave_0, values = (var_1551, var_1549_0))[name = string("op_1554")]; + tensor var_1555_cast_fp16 = mul(x = var_1554, y = sin_s)[name = string("op_1555_cast_fp16")]; + tensor input_43_cast_fp16 = add(x = var_1548_cast_fp16, y = var_1555_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor k_padded_3_pad_0 = const()[name = string("k_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_3_mode_0 = const()[name = string("k_padded_3_mode_0"), val = string("constant")]; + fp16 const_18_to_fp16 = const()[name = string("const_18_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_3_cast_fp16 = pad(constant_val = const_18_to_fp16, mode = k_padded_3_mode_0, pad = k_padded_3_pad_0, x = input_43_cast_fp16)[name = string("k_padded_3_cast_fp16")]; + tensor v_padded_3_pad_0 = const()[name = string("v_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_3_mode_0 = const()[name = string("v_padded_3_mode_0"), val = string("constant")]; + fp16 const_19_to_fp16 = const()[name = string("const_19_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_3_cast_fp16 = pad(constant_val = const_19_to_fp16, mode = v_padded_3_mode_0, pad = v_padded_3_pad_0, x = input_45_cast_fp16)[name = string("v_padded_3_cast_fp16")]; + tensor var_1584_begin_0 = const()[name = string("op_1584_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1584_end_0 = const()[name = string("op_1584_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1584_end_mask_0 = const()[name = string("op_1584_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1584_cast_fp16 = slice_by_index(begin = var_1584_begin_0, end = var_1584_end_0, end_mask = var_1584_end_mask_0, x = K_sliding_slot_3_cast_fp16)[name = string("op_1584_cast_fp16")]; + int32 var_1591 = const()[name = string("op_1591"), val = int32(2)]; + bool K_sliding_out_3_interleave_0 = const()[name = string("K_sliding_out_3_interleave_0"), val = bool(false)]; + tensor K_sliding_out_3_cast_fp16 = concat(axis = var_1591, interleave = K_sliding_out_3_interleave_0, values = (var_1584_cast_fp16, k_padded_3_cast_fp16))[name = string("K_sliding_out_3_cast_fp16")]; + tensor var_1607_begin_0 = const()[name = string("op_1607_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1607_end_0 = const()[name = string("op_1607_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1607_end_mask_0 = const()[name = string("op_1607_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1607_cast_fp16 = slice_by_index(begin = var_1607_begin_0, end = var_1607_end_0, end_mask = var_1607_end_mask_0, x = V_sliding_slot_3_cast_fp16)[name = string("op_1607_cast_fp16")]; + int32 var_1614 = const()[name = string("op_1614"), val = int32(2)]; + bool V_sliding_out_3_interleave_0 = const()[name = string("V_sliding_out_3_interleave_0"), val = bool(false)]; + tensor V_sliding_out_3_cast_fp16 = concat(axis = var_1614, interleave = V_sliding_out_3_interleave_0, values = (var_1607_cast_fp16, v_padded_3_cast_fp16))[name = string("V_sliding_out_3_cast_fp16")]; + tensor K_for_attn_3_begin_0 = const()[name = string("K_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_3_end_0 = const()[name = string("K_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_3_end_mask_0 = const()[name = string("K_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_3_cast_fp16 = slice_by_index(begin = K_for_attn_3_begin_0, end = K_for_attn_3_end_0, end_mask = K_for_attn_3_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("K_for_attn_3_cast_fp16")]; + tensor V_for_attn_3_begin_0 = const()[name = string("V_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_3_end_0 = const()[name = string("V_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_3_end_mask_0 = const()[name = string("V_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_3_cast_fp16 = slice_by_index(begin = V_for_attn_3_begin_0, end = V_for_attn_3_end_0, end_mask = V_for_attn_3_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("V_for_attn_3_cast_fp16")]; + tensor transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = K_for_attn_3_cast_fp16)[name = string("transpose_193")]; + tensor tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_4, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")]; + tensor transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_192")]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_5, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = V_for_attn_3_cast_fp16)[name = string("transpose_191")]; + tensor tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_6, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_190")]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_7, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")]; + tensor V_expanded_3_perm_0 = const()[name = string("V_expanded_3_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor transpose_49_cast_fp16 = transpose(perm = transpose_49_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_189")]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_15_cast_fp16, y = transpose_49_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_27_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_27_cast_fp16)[name = string("reduce_max_1")]; + tensor var_1655 = sub(x = x_27_cast_fp16, y = reduce_max_1)[name = string("op_1655")]; + tensor var_1661 = exp(x = var_1655)[name = string("op_1661")]; + tensor var_1671_axes_0 = const()[name = string("op_1671_axes_0"), val = tensor([-1])]; + bool var_1671_keep_dims_0 = const()[name = string("op_1671_keep_dims_0"), val = bool(true)]; + tensor var_1671 = reduce_sum(axes = var_1671_axes_0, keep_dims = var_1671_keep_dims_0, x = var_1661)[name = string("op_1671")]; + tensor var_1677_cast_fp16 = real_div(x = var_1661, y = var_1671)[name = string("op_1677_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor V_expanded_3_cast_fp16 = transpose(perm = V_expanded_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_188")]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_1677_cast_fp16, y = V_expanded_3_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_1688 = const()[name = string("op_1688"), val = tensor([0, 2, 1, 3])]; + tensor var_1695 = const()[name = string("op_1695"), val = tensor([1, 1, -1])]; + tensor var_1689_cast_fp16 = transpose(perm = var_1688, x = attn_output_7_cast_fp16)[name = string("transpose_187")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_1695, x = var_1689_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_1700 = const()[name = string("op_1700"), val = tensor([0, 2, 1])]; + string var_1716_pad_type_0 = const()[name = string("op_1716_pad_type_0"), val = string("valid")]; + int32 var_1716_groups_0 = const()[name = string("op_1716_groups_0"), val = int32(1)]; + tensor var_1716_strides_0 = const()[name = string("op_1716_strides_0"), val = tensor([1])]; + tensor var_1716_pad_0 = const()[name = string("op_1716_pad_0"), val = tensor([0, 0])]; + tensor var_1716_dilations_0 = const()[name = string("op_1716_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548005184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550626688))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1701_cast_fp16 = transpose(perm = var_1700, x = attn_output_9_cast_fp16)[name = string("transpose_186")]; + tensor var_1716_cast_fp16 = conv(dilations = var_1716_dilations_0, groups = var_1716_groups_0, pad = var_1716_pad_0, pad_type = var_1716_pad_type_0, strides = var_1716_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1701_cast_fp16)[name = string("op_1716_cast_fp16")]; + tensor var_1720 = const()[name = string("op_1720"), val = tensor([0, 2, 1])]; + int32 var_1726 = const()[name = string("op_1726"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_31_cast_fp16 = transpose(perm = var_1720, x = var_1716_cast_fp16)[name = string("transpose_185")]; + tensor var_1728_cast_fp16 = mul(x = x_31_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1728_cast_fp16")]; + bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; + tensor input_49_cast_fp16 = concat(axis = var_1726, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1728_cast_fp16))[name = string("input_49_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1723_to_fp16 = const()[name = string("op_1723_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1723_to_fp16, x = input_49_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1733_split_sizes_0 = const()[name = string("op_1733_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1733_axis_0 = const()[name = string("op_1733_axis_0"), val = int32(-1)]; + tensor var_1733_cast_fp16_0, tensor var_1733_cast_fp16_1 = split(axis = var_1733_axis_0, split_sizes = var_1733_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1733_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550629312)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1733_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_19_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_33_cast_fp16")]; + int32 var_1742 = const()[name = string("op_1742"), val = int32(-1)]; + fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1744_cast_fp16 = mul(x = x_33_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1744_cast_fp16")]; + bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; + tensor input_51_cast_fp16 = concat(axis = var_1742, interleave = input_51_interleave_0, values = (x_33_cast_fp16, var_1744_cast_fp16))[name = string("input_51_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1739_to_fp16 = const()[name = string("op_1739_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1739_to_fp16, x = input_51_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor var_1749_split_sizes_0 = const()[name = string("op_1749_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1749_axis_0 = const()[name = string("op_1749_axis_0"), val = int32(-1)]; + tensor var_1749_cast_fp16_0, tensor var_1749_cast_fp16_1 = split(axis = var_1749_axis_0, split_sizes = var_1749_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1749_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550634496)))]; + tensor h_9_cast_fp16 = mul(x = var_1749_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1760 = const()[name = string("op_1760"), val = tensor([0, 2, 1])]; + tensor input_53_axes_0 = const()[name = string("input_53_axes_0"), val = tensor([2])]; + tensor var_1761 = transpose(perm = var_1760, x = h_9_cast_fp16)[name = string("transpose_184")]; + tensor input_53 = expand_dims(axes = input_53_axes_0, x = var_1761)[name = string("input_53")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_53)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_53)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_55 = mul(x = gate_7, y = up_3)[name = string("input_55")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_55)[name = string("mlp_out_3")]; + tensor var_1801_axes_0 = const()[name = string("op_1801_axes_0"), val = tensor([2])]; + tensor var_1801 = squeeze(axes = var_1801_axes_0, x = mlp_out_3)[name = string("op_1801")]; + tensor var_1805 = const()[name = string("op_1805"), val = tensor([0, 2, 1])]; + int32 var_1811 = const()[name = string("op_1811"), val = int32(-1)]; + fp16 const_22_promoted = const()[name = string("const_22_promoted"), val = fp16(-0x1p+0)]; + tensor x_35 = transpose(perm = var_1805, x = var_1801)[name = string("transpose_183")]; + tensor var_1813 = mul(x = x_35, y = const_22_promoted)[name = string("op_1813")]; + bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; + tensor input_57 = concat(axis = var_1811, interleave = input_57_interleave_0, values = (x_35, var_1813))[name = string("input_57")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1808_to_fp16 = const()[name = string("op_1808_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1808_to_fp16, x = input_57)[name = string("normed_53_cast_fp16")]; + tensor var_1818_split_sizes_0 = const()[name = string("op_1818_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1818_axis_0 = const()[name = string("op_1818_axis_0"), val = int32(-1)]; + tensor var_1818_0, tensor var_1818_1 = split(axis = var_1818_axis_0, split_sizes = var_1818_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1818")]; + tensor hidden_states_13 = mul(x = var_1818_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_33_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 256])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 1, 512])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1846 = const()[name = string("op_1846"), val = tensor([0, 2, 1])]; + tensor input_59_axes_0 = const()[name = string("input_59_axes_0"), val = tensor([2])]; + tensor var_1847 = transpose(perm = var_1846, x = hidden_states_15_cast_fp16)[name = string("transpose_182")]; + tensor input_59 = expand_dims(axes = input_59_axes_0, x = var_1847)[name = string("input_59")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_59)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1866 = const()[name = string("op_1866"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1867_cast_fp16 = transpose(perm = var_1866, x = per_layer_slice_3_cast_fp16)[name = string("transpose_181")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1867_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_61_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_61_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550639680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550967424))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_1883_axes_0 = const()[name = string("op_1883_axes_0"), val = tensor([2])]; + tensor var_1883_cast_fp16 = squeeze(axes = var_1883_axes_0, x = gated_11_cast_fp16)[name = string("op_1883_cast_fp16")]; + tensor var_1887 = const()[name = string("op_1887"), val = tensor([0, 2, 1])]; + int32 var_1893 = const()[name = string("op_1893"), val = int32(-1)]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_37_cast_fp16 = transpose(perm = var_1887, x = var_1883_cast_fp16)[name = string("transpose_180")]; + tensor var_1895_cast_fp16 = mul(x = x_37_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_1895_cast_fp16")]; + bool input_63_interleave_0 = const()[name = string("input_63_interleave_0"), val = bool(false)]; + tensor input_63_cast_fp16 = concat(axis = var_1893, interleave = input_63_interleave_0, values = (x_37_cast_fp16, var_1895_cast_fp16))[name = string("input_63_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_1890_to_fp16 = const()[name = string("op_1890_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1890_to_fp16, x = input_63_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_1900_split_sizes_0 = const()[name = string("op_1900_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1900_axis_0 = const()[name = string("op_1900_axis_0"), val = int32(-1)]; + tensor var_1900_cast_fp16_0, tensor var_1900_cast_fp16_1 = split(axis = var_1900_axis_0, split_sizes = var_1900_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1900_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550970048)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_1900_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = tensor([0x1.48p-3])]; + tensor x_39_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_24_promoted_to_fp16)[name = string("x_39_cast_fp16")]; + tensor var_1912_axes_0 = const()[name = string("op_1912_axes_0"), val = tensor([0])]; + tensor var_1912_cast_fp16 = squeeze(axes = var_1912_axes_0, x = K_sliding_out_3_cast_fp16)[name = string("op_1912_cast_fp16")]; + tensor var_1914_axes_0 = const()[name = string("op_1914_axes_0"), val = tensor([0])]; + tensor var_1914_cast_fp16 = squeeze(axes = var_1914_axes_0, x = V_sliding_out_3_cast_fp16)[name = string("op_1914_cast_fp16")]; + tensor var_1917_begin_0 = const()[name = string("op_1917_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_1917_end_0 = const()[name = string("op_1917_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_1917_end_mask_0 = const()[name = string("op_1917_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1917_squeeze_mask_0 = const()[name = string("op_1917_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1917_cast_fp16 = slice_by_index(begin = var_1917_begin_0, end = var_1917_end_0, end_mask = var_1917_end_mask_0, squeeze_mask = var_1917_squeeze_mask_0, x = K_sliding_in)[name = string("op_1917_cast_fp16")]; + tensor K_sliding_slot_5_axes_0 = const()[name = string("K_sliding_slot_5_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_5_cast_fp16 = expand_dims(axes = K_sliding_slot_5_axes_0, x = var_1917_cast_fp16)[name = string("K_sliding_slot_5_cast_fp16")]; + tensor var_1922_begin_0 = const()[name = string("op_1922_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_1922_end_0 = const()[name = string("op_1922_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_1922_end_mask_0 = const()[name = string("op_1922_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1922_squeeze_mask_0 = const()[name = string("op_1922_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1922_cast_fp16 = slice_by_index(begin = var_1922_begin_0, end = var_1922_end_0, end_mask = var_1922_end_mask_0, squeeze_mask = var_1922_squeeze_mask_0, x = V_sliding_in)[name = string("op_1922_cast_fp16")]; + tensor V_sliding_slot_5_axes_0 = const()[name = string("V_sliding_slot_5_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_5_cast_fp16 = expand_dims(axes = V_sliding_slot_5_axes_0, x = var_1922_cast_fp16)[name = string("V_sliding_slot_5_cast_fp16")]; + int32 var_1929 = const()[name = string("op_1929"), val = int32(-1)]; + fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1931_cast_fp16 = mul(x = x_39_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_1931_cast_fp16")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65_cast_fp16 = concat(axis = var_1929, interleave = input_65_interleave_0, values = (x_39_cast_fp16, var_1931_cast_fp16))[name = string("input_65_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_1926_to_fp16 = const()[name = string("op_1926_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1926_to_fp16, x = input_65_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor var_1936_split_sizes_0 = const()[name = string("op_1936_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1936_axis_0 = const()[name = string("op_1936_axis_0"), val = int32(-1)]; + tensor var_1936_cast_fp16_0, tensor var_1936_cast_fp16_1 = split(axis = var_1936_axis_0, split_sizes = var_1936_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1936_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550975232)))]; + tensor h_13_cast_fp16 = mul(x = var_1936_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_1942 = const()[name = string("op_1942"), val = tensor([0, 2, 1])]; + tensor var_1945_axes_0 = const()[name = string("op_1945_axes_0"), val = tensor([2])]; + tensor var_1943_cast_fp16 = transpose(perm = var_1942, x = h_13_cast_fp16)[name = string("transpose_179")]; + tensor var_1945_cast_fp16 = expand_dims(axes = var_1945_axes_0, x = var_1943_cast_fp16)[name = string("op_1945_cast_fp16")]; + string var_1961_pad_type_0 = const()[name = string("op_1961_pad_type_0"), val = string("valid")]; + tensor var_1961_strides_0 = const()[name = string("op_1961_strides_0"), val = tensor([1, 1])]; + tensor var_1961_pad_0 = const()[name = string("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_dilations_0 = const()[name = string("op_1961_dilations_0"), val = tensor([1, 1])]; + int32 var_1961_groups_0 = const()[name = string("op_1961_groups_0"), val = int32(1)]; + tensor var_1961 = conv(dilations = var_1961_dilations_0, groups = var_1961_groups_0, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1961_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1945_cast_fp16)[name = string("op_1961")]; + tensor var_1966 = const()[name = string("op_1966"), val = tensor([1, 8, 256, 1])]; + tensor var_1967 = reshape(shape = var_1966, x = var_1961)[name = string("op_1967")]; + tensor var_1972 = const()[name = string("op_1972"), val = tensor([0, 1, 3, 2])]; + tensor var_1982 = const()[name = string("op_1982"), val = tensor([1, 8, 256])]; + tensor var_1973 = transpose(perm = var_1972, x = var_1967)[name = string("transpose_178")]; + tensor x_41 = reshape(shape = var_1982, x = var_1973)[name = string("x_41")]; + int32 var_1988 = const()[name = string("op_1988"), val = int32(-1)]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_1990 = mul(x = x_41, y = const_26_promoted)[name = string("op_1990")]; + bool input_69_interleave_0 = const()[name = string("input_69_interleave_0"), val = bool(false)]; + tensor input_69 = concat(axis = var_1988, interleave = input_69_interleave_0, values = (x_41, var_1990))[name = string("input_69")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1985_to_fp16, x = input_69)[name = string("normed_65_cast_fp16")]; + tensor var_1995_split_sizes_0 = const()[name = string("op_1995_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1995_axis_0 = const()[name = string("op_1995_axis_0"), val = int32(-1)]; + tensor var_1995_0, tensor var_1995_1 = split(axis = var_1995_axis_0, split_sizes = var_1995_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1995")]; + tensor var_1997 = mul(x = var_1995_0, y = layers_2_self_attn_q_norm_weight)[name = string("op_1997")]; + tensor var_2002 = const()[name = string("op_2002"), val = tensor([1, 8, 1, 256])]; + tensor q_19 = reshape(shape = var_2002, x = var_1997)[name = string("q_19")]; + tensor var_2004_cast_fp16 = mul(x = q_19, y = cos_s)[name = string("op_2004_cast_fp16")]; + tensor var_2005_split_sizes_0 = const()[name = string("op_2005_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2005_axis_0 = const()[name = string("op_2005_axis_0"), val = int32(-1)]; + tensor var_2005_0, tensor var_2005_1 = split(axis = var_2005_axis_0, split_sizes = var_2005_split_sizes_0, x = q_19)[name = string("op_2005")]; + fp16 const_27_promoted = const()[name = string("const_27_promoted"), val = fp16(-0x1p+0)]; + tensor var_2007 = mul(x = var_2005_1, y = const_27_promoted)[name = string("op_2007")]; + int32 var_2009 = const()[name = string("op_2009"), val = int32(-1)]; + bool var_2010_interleave_0 = const()[name = string("op_2010_interleave_0"), val = bool(false)]; + tensor var_2010 = concat(axis = var_2009, interleave = var_2010_interleave_0, values = (var_2007, var_2005_0))[name = string("op_2010")]; + tensor var_2011_cast_fp16 = mul(x = var_2010, y = sin_s)[name = string("op_2011_cast_fp16")]; + tensor q_23_cast_fp16 = add(x = var_2004_cast_fp16, y = var_2011_cast_fp16)[name = string("q_23_cast_fp16")]; + string var_2024_pad_type_0 = const()[name = string("op_2024_pad_type_0"), val = string("valid")]; + tensor var_2024_strides_0 = const()[name = string("op_2024_strides_0"), val = tensor([1, 1])]; + tensor var_2024_pad_0 = const()[name = string("op_2024_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2024_dilations_0 = const()[name = string("op_2024_dilations_0"), val = tensor([1, 1])]; + int32 var_2024_groups_0 = const()[name = string("op_2024_groups_0"), val = int32(1)]; + tensor var_2024 = conv(dilations = var_2024_dilations_0, groups = var_2024_groups_0, pad = var_2024_pad_0, pad_type = var_2024_pad_type_0, strides = var_2024_strides_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = var_1945_cast_fp16)[name = string("op_2024")]; + tensor var_2029 = const()[name = string("op_2029"), val = tensor([1, 2, 256, 1])]; + tensor var_2030 = reshape(shape = var_2029, x = var_2024)[name = string("op_2030")]; + tensor var_2035 = const()[name = string("op_2035"), val = tensor([0, 1, 3, 2])]; + string var_2052_pad_type_0 = const()[name = string("op_2052_pad_type_0"), val = string("valid")]; + tensor var_2052_strides_0 = const()[name = string("op_2052_strides_0"), val = tensor([1, 1])]; + tensor var_2052_pad_0 = const()[name = string("op_2052_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2052_dilations_0 = const()[name = string("op_2052_dilations_0"), val = tensor([1, 1])]; + int32 var_2052_groups_0 = const()[name = string("op_2052_groups_0"), val = int32(1)]; + tensor var_2052 = conv(dilations = var_2052_dilations_0, groups = var_2052_groups_0, pad = var_2052_pad_0, pad_type = var_2052_pad_type_0, strides = var_2052_strides_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = var_1945_cast_fp16)[name = string("op_2052")]; + tensor var_2057 = const()[name = string("op_2057"), val = tensor([1, 2, 256, 1])]; + tensor var_2058 = reshape(shape = var_2057, x = var_2052)[name = string("op_2058")]; + tensor var_2063 = const()[name = string("op_2063"), val = tensor([0, 1, 3, 2])]; + tensor var_2073 = const()[name = string("op_2073"), val = tensor([1, 2, 256])]; + tensor var_2036 = transpose(perm = var_2035, x = var_2030)[name = string("transpose_177")]; + tensor x_43 = reshape(shape = var_2073, x = var_2036)[name = string("x_43")]; + int32 var_2079 = const()[name = string("op_2079"), val = int32(-1)]; + fp16 const_28_promoted = const()[name = string("const_28_promoted"), val = fp16(-0x1p+0)]; + tensor var_2081 = mul(x = x_43, y = const_28_promoted)[name = string("op_2081")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71 = concat(axis = var_2079, interleave = input_71_interleave_0, values = (x_43, var_2081))[name = string("input_71")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_2076_to_fp16 = const()[name = string("op_2076_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2076_to_fp16, x = input_71)[name = string("normed_69_cast_fp16")]; + tensor var_2086_split_sizes_0 = const()[name = string("op_2086_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2086_axis_0 = const()[name = string("op_2086_axis_0"), val = int32(-1)]; + tensor var_2086_0, tensor var_2086_1 = split(axis = var_2086_axis_0, split_sizes = var_2086_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2086")]; + tensor var_2088 = mul(x = var_2086_0, y = layers_2_self_attn_k_norm_weight)[name = string("op_2088")]; + tensor var_2093 = const()[name = string("op_2093"), val = tensor([1, 2, 1, 256])]; + tensor q_21 = reshape(shape = var_2093, x = var_2088)[name = string("q_21")]; + fp16 var_2095_promoted = const()[name = string("op_2095_promoted"), val = fp16(0x1p+1)]; + tensor var_2064 = transpose(perm = var_2063, x = var_2058)[name = string("transpose_176")]; + tensor var_2096 = pow(x = var_2064, y = var_2095_promoted)[name = string("op_2096")]; + tensor var_2101_axes_0 = const()[name = string("op_2101_axes_0"), val = tensor([-1])]; + bool var_2101_keep_dims_0 = const()[name = string("op_2101_keep_dims_0"), val = bool(true)]; + tensor var_2101 = reduce_mean(axes = var_2101_axes_0, keep_dims = var_2101_keep_dims_0, x = var_2096)[name = string("op_2101")]; + fp16 var_2103_to_fp16 = const()[name = string("op_2103_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_5_cast_fp16 = add(x = var_2101, y = var_2103_to_fp16)[name = string("mean_sq_5_cast_fp16")]; + fp32 var_2105_epsilon_0 = const()[name = string("op_2105_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2105_cast_fp16 = rsqrt(epsilon = var_2105_epsilon_0, x = mean_sq_5_cast_fp16)[name = string("op_2105_cast_fp16")]; + tensor input_75_cast_fp16 = mul(x = var_2064, y = var_2105_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor var_2107_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_2107_cast_fp16")]; + tensor var_2108_split_sizes_0 = const()[name = string("op_2108_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2108_axis_0 = const()[name = string("op_2108_axis_0"), val = int32(-1)]; + tensor var_2108_0, tensor var_2108_1 = split(axis = var_2108_axis_0, split_sizes = var_2108_split_sizes_0, x = q_21)[name = string("op_2108")]; + fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; + tensor var_2110 = mul(x = var_2108_1, y = const_29_promoted)[name = string("op_2110")]; + int32 var_2112 = const()[name = string("op_2112"), val = int32(-1)]; + bool var_2113_interleave_0 = const()[name = string("op_2113_interleave_0"), val = bool(false)]; + tensor var_2113 = concat(axis = var_2112, interleave = var_2113_interleave_0, values = (var_2110, var_2108_0))[name = string("op_2113")]; + tensor var_2114_cast_fp16 = mul(x = var_2113, y = sin_s)[name = string("op_2114_cast_fp16")]; + tensor input_73_cast_fp16 = add(x = var_2107_cast_fp16, y = var_2114_cast_fp16)[name = string("input_73_cast_fp16")]; + tensor k_padded_5_pad_0 = const()[name = string("k_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_5_mode_0 = const()[name = string("k_padded_5_mode_0"), val = string("constant")]; + fp16 const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_5_cast_fp16 = pad(constant_val = const_30_to_fp16, mode = k_padded_5_mode_0, pad = k_padded_5_pad_0, x = input_73_cast_fp16)[name = string("k_padded_5_cast_fp16")]; + tensor v_padded_5_pad_0 = const()[name = string("v_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_5_mode_0 = const()[name = string("v_padded_5_mode_0"), val = string("constant")]; + fp16 const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_5_cast_fp16 = pad(constant_val = const_31_to_fp16, mode = v_padded_5_mode_0, pad = v_padded_5_pad_0, x = input_75_cast_fp16)[name = string("v_padded_5_cast_fp16")]; + tensor var_2143_begin_0 = const()[name = string("op_2143_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2143_end_0 = const()[name = string("op_2143_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2143_end_mask_0 = const()[name = string("op_2143_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2143_cast_fp16 = slice_by_index(begin = var_2143_begin_0, end = var_2143_end_0, end_mask = var_2143_end_mask_0, x = K_sliding_slot_5_cast_fp16)[name = string("op_2143_cast_fp16")]; + int32 var_2150 = const()[name = string("op_2150"), val = int32(2)]; + bool K_sliding_out_5_interleave_0 = const()[name = string("K_sliding_out_5_interleave_0"), val = bool(false)]; + tensor K_sliding_out_5_cast_fp16 = concat(axis = var_2150, interleave = K_sliding_out_5_interleave_0, values = (var_2143_cast_fp16, k_padded_5_cast_fp16))[name = string("K_sliding_out_5_cast_fp16")]; + tensor var_2166_begin_0 = const()[name = string("op_2166_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2166_end_0 = const()[name = string("op_2166_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2166_end_mask_0 = const()[name = string("op_2166_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2166_cast_fp16 = slice_by_index(begin = var_2166_begin_0, end = var_2166_end_0, end_mask = var_2166_end_mask_0, x = V_sliding_slot_5_cast_fp16)[name = string("op_2166_cast_fp16")]; + int32 var_2173 = const()[name = string("op_2173"), val = int32(2)]; + bool V_sliding_out_5_interleave_0 = const()[name = string("V_sliding_out_5_interleave_0"), val = bool(false)]; + tensor V_sliding_out_5_cast_fp16 = concat(axis = var_2173, interleave = V_sliding_out_5_interleave_0, values = (var_2166_cast_fp16, v_padded_5_cast_fp16))[name = string("V_sliding_out_5_cast_fp16")]; + tensor K_for_attn_5_begin_0 = const()[name = string("K_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_5_end_0 = const()[name = string("K_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_5_end_mask_0 = const()[name = string("K_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_5_cast_fp16 = slice_by_index(begin = K_for_attn_5_begin_0, end = K_for_attn_5_end_0, end_mask = K_for_attn_5_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("K_for_attn_5_cast_fp16")]; + tensor V_for_attn_5_begin_0 = const()[name = string("V_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_5_end_0 = const()[name = string("V_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_5_end_mask_0 = const()[name = string("V_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_5_cast_fp16 = slice_by_index(begin = V_for_attn_5_begin_0, end = V_for_attn_5_end_0, end_mask = V_for_attn_5_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("V_for_attn_5_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = K_for_attn_5_cast_fp16)[name = string("transpose_175")]; + tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_8, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_174")]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_9, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = V_for_attn_5_cast_fp16)[name = string("transpose_173")]; + tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_10, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; + tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_172")]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_11, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor transpose_50_cast_fp16 = transpose(perm = transpose_50_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_171")]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_23_cast_fp16, y = transpose_50_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_47_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_47_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_47_cast_fp16)[name = string("reduce_max_2")]; + tensor var_2214 = sub(x = x_47_cast_fp16, y = reduce_max_2)[name = string("op_2214")]; + tensor var_2220 = exp(x = var_2214)[name = string("op_2220")]; + tensor var_2230_axes_0 = const()[name = string("op_2230_axes_0"), val = tensor([-1])]; + bool var_2230_keep_dims_0 = const()[name = string("op_2230_keep_dims_0"), val = bool(true)]; + tensor var_2230 = reduce_sum(axes = var_2230_axes_0, keep_dims = var_2230_keep_dims_0, x = var_2220)[name = string("op_2230")]; + tensor var_2236_cast_fp16 = real_div(x = var_2220, y = var_2230)[name = string("op_2236_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_170")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_2236_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_2247 = const()[name = string("op_2247"), val = tensor([0, 2, 1, 3])]; + tensor var_2254 = const()[name = string("op_2254"), val = tensor([1, 1, -1])]; + tensor var_2248_cast_fp16 = transpose(perm = var_2247, x = attn_output_13_cast_fp16)[name = string("transpose_169")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2254, x = var_2248_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2259 = const()[name = string("op_2259"), val = tensor([0, 2, 1])]; + string var_2275_pad_type_0 = const()[name = string("op_2275_pad_type_0"), val = string("valid")]; + int32 var_2275_groups_0 = const()[name = string("op_2275_groups_0"), val = int32(1)]; + tensor var_2275_strides_0 = const()[name = string("op_2275_strides_0"), val = tensor([1])]; + tensor var_2275_pad_0 = const()[name = string("op_2275_pad_0"), val = tensor([0, 0])]; + tensor var_2275_dilations_0 = const()[name = string("op_2275_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550980416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553601920))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2260_cast_fp16 = transpose(perm = var_2259, x = attn_output_15_cast_fp16)[name = string("transpose_168")]; + tensor var_2275_cast_fp16 = conv(dilations = var_2275_dilations_0, groups = var_2275_groups_0, pad = var_2275_pad_0, pad_type = var_2275_pad_type_0, strides = var_2275_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2260_cast_fp16)[name = string("op_2275_cast_fp16")]; + tensor var_2279 = const()[name = string("op_2279"), val = tensor([0, 2, 1])]; + int32 var_2285 = const()[name = string("op_2285"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_51_cast_fp16 = transpose(perm = var_2279, x = var_2275_cast_fp16)[name = string("transpose_167")]; + tensor var_2287_cast_fp16 = mul(x = x_51_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2287_cast_fp16")]; + bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; + tensor input_79_cast_fp16 = concat(axis = var_2285, interleave = input_79_interleave_0, values = (x_51_cast_fp16, var_2287_cast_fp16))[name = string("input_79_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_2282_to_fp16 = const()[name = string("op_2282_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2282_to_fp16, x = input_79_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_2292_split_sizes_0 = const()[name = string("op_2292_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2292_axis_0 = const()[name = string("op_2292_axis_0"), val = int32(-1)]; + tensor var_2292_cast_fp16_0, tensor var_2292_cast_fp16_1 = split(axis = var_2292_axis_0, split_sizes = var_2292_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2292_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553604544)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_2292_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_39_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_2301 = const()[name = string("op_2301"), val = int32(-1)]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2303_cast_fp16 = mul(x = x_53_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_2303_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_2301, interleave = input_81_interleave_0, values = (x_53_cast_fp16, var_2303_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_2298_to_fp16 = const()[name = string("op_2298_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2298_to_fp16, x = input_81_cast_fp16)[name = string("normed_77_cast_fp16")]; + tensor var_2308_split_sizes_0 = const()[name = string("op_2308_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2308_axis_0 = const()[name = string("op_2308_axis_0"), val = int32(-1)]; + tensor var_2308_cast_fp16_0, tensor var_2308_cast_fp16_1 = split(axis = var_2308_axis_0, split_sizes = var_2308_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2308_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553609728)))]; + tensor h_15_cast_fp16 = mul(x = var_2308_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_2319 = const()[name = string("op_2319"), val = tensor([0, 2, 1])]; + tensor input_83_axes_0 = const()[name = string("input_83_axes_0"), val = tensor([2])]; + tensor var_2320 = transpose(perm = var_2319, x = h_15_cast_fp16)[name = string("transpose_166")]; + tensor input_83 = expand_dims(axes = input_83_axes_0, x = var_2320)[name = string("input_83")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_83)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_83)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_85 = mul(x = gate_11, y = up_5)[name = string("input_85")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_85)[name = string("mlp_out_5")]; + tensor var_2360_axes_0 = const()[name = string("op_2360_axes_0"), val = tensor([2])]; + tensor var_2360 = squeeze(axes = var_2360_axes_0, x = mlp_out_5)[name = string("op_2360")]; + tensor var_2364 = const()[name = string("op_2364"), val = tensor([0, 2, 1])]; + int32 var_2370 = const()[name = string("op_2370"), val = int32(-1)]; + fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; + tensor x_55 = transpose(perm = var_2364, x = var_2360)[name = string("transpose_165")]; + tensor var_2372 = mul(x = x_55, y = const_34_promoted)[name = string("op_2372")]; + bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; + tensor input_87 = concat(axis = var_2370, interleave = input_87_interleave_0, values = (x_55, var_2372))[name = string("input_87")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_2367_to_fp16 = const()[name = string("op_2367_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2367_to_fp16, x = input_87)[name = string("normed_81_cast_fp16")]; + tensor var_2377_split_sizes_0 = const()[name = string("op_2377_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2377_axis_0 = const()[name = string("op_2377_axis_0"), val = int32(-1)]; + tensor var_2377_0, tensor var_2377_1 = split(axis = var_2377_axis_0, split_sizes = var_2377_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2377")]; + tensor hidden_states_23 = mul(x = var_2377_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 512])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 1, 768])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_2405 = const()[name = string("op_2405"), val = tensor([0, 2, 1])]; + tensor input_89_axes_0 = const()[name = string("input_89_axes_0"), val = tensor([2])]; + tensor var_2406 = transpose(perm = var_2405, x = hidden_states_25_cast_fp16)[name = string("transpose_164")]; + tensor input_89 = expand_dims(axes = input_89_axes_0, x = var_2406)[name = string("input_89")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_89)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_2425 = const()[name = string("op_2425"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_2426_cast_fp16 = transpose(perm = var_2425, x = per_layer_slice_5_cast_fp16)[name = string("transpose_163")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_2426_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_91_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_91_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553614912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553942656))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_91_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_2442_axes_0 = const()[name = string("op_2442_axes_0"), val = tensor([2])]; + tensor var_2442_cast_fp16 = squeeze(axes = var_2442_axes_0, x = gated_17_cast_fp16)[name = string("op_2442_cast_fp16")]; + tensor var_2446 = const()[name = string("op_2446"), val = tensor([0, 2, 1])]; + int32 var_2452 = const()[name = string("op_2452"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_57_cast_fp16 = transpose(perm = var_2446, x = var_2442_cast_fp16)[name = string("transpose_162")]; + tensor var_2454_cast_fp16 = mul(x = x_57_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2454_cast_fp16")]; + bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; + tensor input_93_cast_fp16 = concat(axis = var_2452, interleave = input_93_interleave_0, values = (x_57_cast_fp16, var_2454_cast_fp16))[name = string("input_93_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_2449_to_fp16 = const()[name = string("op_2449_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2449_to_fp16, x = input_93_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_2459_split_sizes_0 = const()[name = string("op_2459_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2459_axis_0 = const()[name = string("op_2459_axis_0"), val = int32(-1)]; + tensor var_2459_cast_fp16_0, tensor var_2459_cast_fp16_1 = split(axis = var_2459_axis_0, split_sizes = var_2459_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2459_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553945280)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_2459_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = tensor([0x1.aep-1])]; + tensor x_59_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_36_promoted_to_fp16)[name = string("x_59_cast_fp16")]; + tensor var_2471_axes_0 = const()[name = string("op_2471_axes_0"), val = tensor([0])]; + tensor var_2471_cast_fp16 = squeeze(axes = var_2471_axes_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2471_cast_fp16")]; + tensor var_2473_axes_0 = const()[name = string("op_2473_axes_0"), val = tensor([0])]; + tensor var_2473_cast_fp16 = squeeze(axes = var_2473_axes_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2473_cast_fp16")]; + tensor var_2476_begin_0 = const()[name = string("op_2476_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2476_end_0 = const()[name = string("op_2476_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2476_end_mask_0 = const()[name = string("op_2476_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2476_squeeze_mask_0 = const()[name = string("op_2476_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2476_cast_fp16 = slice_by_index(begin = var_2476_begin_0, end = var_2476_end_0, end_mask = var_2476_end_mask_0, squeeze_mask = var_2476_squeeze_mask_0, x = K_sliding_in)[name = string("op_2476_cast_fp16")]; + tensor K_sliding_slot_7_axes_0 = const()[name = string("K_sliding_slot_7_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_7_cast_fp16 = expand_dims(axes = K_sliding_slot_7_axes_0, x = var_2476_cast_fp16)[name = string("K_sliding_slot_7_cast_fp16")]; + tensor var_2481_begin_0 = const()[name = string("op_2481_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2481_end_0 = const()[name = string("op_2481_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2481_end_mask_0 = const()[name = string("op_2481_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2481_squeeze_mask_0 = const()[name = string("op_2481_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2481_cast_fp16 = slice_by_index(begin = var_2481_begin_0, end = var_2481_end_0, end_mask = var_2481_end_mask_0, squeeze_mask = var_2481_squeeze_mask_0, x = V_sliding_in)[name = string("op_2481_cast_fp16")]; + tensor V_sliding_slot_7_axes_0 = const()[name = string("V_sliding_slot_7_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_7_cast_fp16 = expand_dims(axes = V_sliding_slot_7_axes_0, x = var_2481_cast_fp16)[name = string("V_sliding_slot_7_cast_fp16")]; + int32 var_2488 = const()[name = string("op_2488"), val = int32(-1)]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2490_cast_fp16 = mul(x = x_59_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_2490_cast_fp16")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95_cast_fp16 = concat(axis = var_2488, interleave = input_95_interleave_0, values = (x_59_cast_fp16, var_2490_cast_fp16))[name = string("input_95_cast_fp16")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_2485_to_fp16 = const()[name = string("op_2485_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_2485_to_fp16, x = input_95_cast_fp16)[name = string("normed_89_cast_fp16")]; + tensor var_2495_split_sizes_0 = const()[name = string("op_2495_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2495_axis_0 = const()[name = string("op_2495_axis_0"), val = int32(-1)]; + tensor var_2495_cast_fp16_0, tensor var_2495_cast_fp16_1 = split(axis = var_2495_axis_0, split_sizes = var_2495_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_2495_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553950464)))]; + tensor h_19_cast_fp16 = mul(x = var_2495_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_2501 = const()[name = string("op_2501"), val = tensor([0, 2, 1])]; + tensor var_2504_axes_0 = const()[name = string("op_2504_axes_0"), val = tensor([2])]; + tensor var_2502_cast_fp16 = transpose(perm = var_2501, x = h_19_cast_fp16)[name = string("transpose_161")]; + tensor var_2504_cast_fp16 = expand_dims(axes = var_2504_axes_0, x = var_2502_cast_fp16)[name = string("op_2504_cast_fp16")]; + string var_2520_pad_type_0 = const()[name = string("op_2520_pad_type_0"), val = string("valid")]; + tensor var_2520_strides_0 = const()[name = string("op_2520_strides_0"), val = tensor([1, 1])]; + tensor var_2520_pad_0 = const()[name = string("op_2520_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2520_dilations_0 = const()[name = string("op_2520_dilations_0"), val = tensor([1, 1])]; + int32 var_2520_groups_0 = const()[name = string("op_2520_groups_0"), val = int32(1)]; + tensor var_2520 = conv(dilations = var_2520_dilations_0, groups = var_2520_groups_0, pad = var_2520_pad_0, pad_type = var_2520_pad_type_0, strides = var_2520_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_2504_cast_fp16)[name = string("op_2520")]; + tensor var_2525 = const()[name = string("op_2525"), val = tensor([1, 8, 256, 1])]; + tensor var_2526 = reshape(shape = var_2525, x = var_2520)[name = string("op_2526")]; + tensor var_2531 = const()[name = string("op_2531"), val = tensor([0, 1, 3, 2])]; + tensor var_2541 = const()[name = string("op_2541"), val = tensor([1, 8, 256])]; + tensor var_2532 = transpose(perm = var_2531, x = var_2526)[name = string("transpose_160")]; + tensor x_61 = reshape(shape = var_2541, x = var_2532)[name = string("x_61")]; + int32 var_2547 = const()[name = string("op_2547"), val = int32(-1)]; + fp16 const_38_promoted = const()[name = string("const_38_promoted"), val = fp16(-0x1p+0)]; + tensor var_2549 = mul(x = x_61, y = const_38_promoted)[name = string("op_2549")]; + bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; + tensor input_99 = concat(axis = var_2547, interleave = input_99_interleave_0, values = (x_61, var_2549))[name = string("input_99")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_2544_to_fp16 = const()[name = string("op_2544_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_2544_to_fp16, x = input_99)[name = string("normed_93_cast_fp16")]; + tensor var_2554_split_sizes_0 = const()[name = string("op_2554_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2554_axis_0 = const()[name = string("op_2554_axis_0"), val = int32(-1)]; + tensor var_2554_0, tensor var_2554_1 = split(axis = var_2554_axis_0, split_sizes = var_2554_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_2554")]; + tensor var_2556 = mul(x = var_2554_0, y = layers_3_self_attn_q_norm_weight)[name = string("op_2556")]; + tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 8, 1, 256])]; + tensor q_27 = reshape(shape = var_2561, x = var_2556)[name = string("q_27")]; + tensor var_2563_cast_fp16 = mul(x = q_27, y = cos_s)[name = string("op_2563_cast_fp16")]; + tensor var_2564_split_sizes_0 = const()[name = string("op_2564_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2564_axis_0 = const()[name = string("op_2564_axis_0"), val = int32(-1)]; + tensor var_2564_0, tensor var_2564_1 = split(axis = var_2564_axis_0, split_sizes = var_2564_split_sizes_0, x = q_27)[name = string("op_2564")]; + fp16 const_39_promoted = const()[name = string("const_39_promoted"), val = fp16(-0x1p+0)]; + tensor var_2566 = mul(x = var_2564_1, y = const_39_promoted)[name = string("op_2566")]; + int32 var_2568 = const()[name = string("op_2568"), val = int32(-1)]; + bool var_2569_interleave_0 = const()[name = string("op_2569_interleave_0"), val = bool(false)]; + tensor var_2569 = concat(axis = var_2568, interleave = var_2569_interleave_0, values = (var_2566, var_2564_0))[name = string("op_2569")]; + tensor var_2570_cast_fp16 = mul(x = var_2569, y = sin_s)[name = string("op_2570_cast_fp16")]; + tensor q_31_cast_fp16 = add(x = var_2563_cast_fp16, y = var_2570_cast_fp16)[name = string("q_31_cast_fp16")]; + string var_2583_pad_type_0 = const()[name = string("op_2583_pad_type_0"), val = string("valid")]; + tensor var_2583_strides_0 = const()[name = string("op_2583_strides_0"), val = tensor([1, 1])]; + tensor var_2583_pad_0 = const()[name = string("op_2583_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2583_dilations_0 = const()[name = string("op_2583_dilations_0"), val = tensor([1, 1])]; + int32 var_2583_groups_0 = const()[name = string("op_2583_groups_0"), val = int32(1)]; + tensor var_2583 = conv(dilations = var_2583_dilations_0, groups = var_2583_groups_0, pad = var_2583_pad_0, pad_type = var_2583_pad_type_0, strides = var_2583_strides_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = var_2504_cast_fp16)[name = string("op_2583")]; + tensor var_2588 = const()[name = string("op_2588"), val = tensor([1, 2, 256, 1])]; + tensor var_2589 = reshape(shape = var_2588, x = var_2583)[name = string("op_2589")]; + tensor var_2594 = const()[name = string("op_2594"), val = tensor([0, 1, 3, 2])]; + string var_2611_pad_type_0 = const()[name = string("op_2611_pad_type_0"), val = string("valid")]; + tensor var_2611_strides_0 = const()[name = string("op_2611_strides_0"), val = tensor([1, 1])]; + tensor var_2611_pad_0 = const()[name = string("op_2611_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2611_dilations_0 = const()[name = string("op_2611_dilations_0"), val = tensor([1, 1])]; + int32 var_2611_groups_0 = const()[name = string("op_2611_groups_0"), val = int32(1)]; + tensor var_2611 = conv(dilations = var_2611_dilations_0, groups = var_2611_groups_0, pad = var_2611_pad_0, pad_type = var_2611_pad_type_0, strides = var_2611_strides_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = var_2504_cast_fp16)[name = string("op_2611")]; + tensor var_2616 = const()[name = string("op_2616"), val = tensor([1, 2, 256, 1])]; + tensor var_2617 = reshape(shape = var_2616, x = var_2611)[name = string("op_2617")]; + tensor var_2622 = const()[name = string("op_2622"), val = tensor([0, 1, 3, 2])]; + tensor var_2632 = const()[name = string("op_2632"), val = tensor([1, 2, 256])]; + tensor var_2595 = transpose(perm = var_2594, x = var_2589)[name = string("transpose_159")]; + tensor x_63 = reshape(shape = var_2632, x = var_2595)[name = string("x_63")]; + int32 var_2638 = const()[name = string("op_2638"), val = int32(-1)]; + fp16 const_40_promoted = const()[name = string("const_40_promoted"), val = fp16(-0x1p+0)]; + tensor var_2640 = mul(x = x_63, y = const_40_promoted)[name = string("op_2640")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101 = concat(axis = var_2638, interleave = input_101_interleave_0, values = (x_63, var_2640))[name = string("input_101")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_2635_to_fp16, x = input_101)[name = string("normed_97_cast_fp16")]; + tensor var_2645_split_sizes_0 = const()[name = string("op_2645_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2645_axis_0 = const()[name = string("op_2645_axis_0"), val = int32(-1)]; + tensor var_2645_0, tensor var_2645_1 = split(axis = var_2645_axis_0, split_sizes = var_2645_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2645")]; + tensor var_2647 = mul(x = var_2645_0, y = layers_3_self_attn_k_norm_weight)[name = string("op_2647")]; + tensor var_2652 = const()[name = string("op_2652"), val = tensor([1, 2, 1, 256])]; + tensor q_29 = reshape(shape = var_2652, x = var_2647)[name = string("q_29")]; + fp16 var_2654_promoted = const()[name = string("op_2654_promoted"), val = fp16(0x1p+1)]; + tensor var_2623 = transpose(perm = var_2622, x = var_2617)[name = string("transpose_158")]; + tensor var_2655 = pow(x = var_2623, y = var_2654_promoted)[name = string("op_2655")]; + tensor var_2660_axes_0 = const()[name = string("op_2660_axes_0"), val = tensor([-1])]; + bool var_2660_keep_dims_0 = const()[name = string("op_2660_keep_dims_0"), val = bool(true)]; + tensor var_2660 = reduce_mean(axes = var_2660_axes_0, keep_dims = var_2660_keep_dims_0, x = var_2655)[name = string("op_2660")]; + fp16 var_2662_to_fp16 = const()[name = string("op_2662_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_7_cast_fp16 = add(x = var_2660, y = var_2662_to_fp16)[name = string("mean_sq_7_cast_fp16")]; + fp32 var_2664_epsilon_0 = const()[name = string("op_2664_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2664_cast_fp16 = rsqrt(epsilon = var_2664_epsilon_0, x = mean_sq_7_cast_fp16)[name = string("op_2664_cast_fp16")]; + tensor input_105_cast_fp16 = mul(x = var_2623, y = var_2664_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_2666_cast_fp16 = mul(x = q_29, y = cos_s)[name = string("op_2666_cast_fp16")]; + tensor var_2667_split_sizes_0 = const()[name = string("op_2667_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2667_axis_0 = const()[name = string("op_2667_axis_0"), val = int32(-1)]; + tensor var_2667_0, tensor var_2667_1 = split(axis = var_2667_axis_0, split_sizes = var_2667_split_sizes_0, x = q_29)[name = string("op_2667")]; + fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; + tensor var_2669 = mul(x = var_2667_1, y = const_41_promoted)[name = string("op_2669")]; + int32 var_2671 = const()[name = string("op_2671"), val = int32(-1)]; + bool var_2672_interleave_0 = const()[name = string("op_2672_interleave_0"), val = bool(false)]; + tensor var_2672 = concat(axis = var_2671, interleave = var_2672_interleave_0, values = (var_2669, var_2667_0))[name = string("op_2672")]; + tensor var_2673_cast_fp16 = mul(x = var_2672, y = sin_s)[name = string("op_2673_cast_fp16")]; + tensor input_103_cast_fp16 = add(x = var_2666_cast_fp16, y = var_2673_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor k_padded_7_pad_0 = const()[name = string("k_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_7_mode_0 = const()[name = string("k_padded_7_mode_0"), val = string("constant")]; + fp16 const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_7_cast_fp16 = pad(constant_val = const_42_to_fp16, mode = k_padded_7_mode_0, pad = k_padded_7_pad_0, x = input_103_cast_fp16)[name = string("k_padded_7_cast_fp16")]; + tensor v_padded_7_pad_0 = const()[name = string("v_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_7_mode_0 = const()[name = string("v_padded_7_mode_0"), val = string("constant")]; + fp16 const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_7_cast_fp16 = pad(constant_val = const_43_to_fp16, mode = v_padded_7_mode_0, pad = v_padded_7_pad_0, x = input_105_cast_fp16)[name = string("v_padded_7_cast_fp16")]; + tensor var_2702_begin_0 = const()[name = string("op_2702_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2702_end_0 = const()[name = string("op_2702_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2702_end_mask_0 = const()[name = string("op_2702_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2702_cast_fp16 = slice_by_index(begin = var_2702_begin_0, end = var_2702_end_0, end_mask = var_2702_end_mask_0, x = K_sliding_slot_7_cast_fp16)[name = string("op_2702_cast_fp16")]; + int32 var_2709 = const()[name = string("op_2709"), val = int32(2)]; + bool K_sliding_out_7_interleave_0 = const()[name = string("K_sliding_out_7_interleave_0"), val = bool(false)]; + tensor K_sliding_out_7_cast_fp16 = concat(axis = var_2709, interleave = K_sliding_out_7_interleave_0, values = (var_2702_cast_fp16, k_padded_7_cast_fp16))[name = string("K_sliding_out_7_cast_fp16")]; + tensor var_2725_begin_0 = const()[name = string("op_2725_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2725_end_0 = const()[name = string("op_2725_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2725_end_mask_0 = const()[name = string("op_2725_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2725_cast_fp16 = slice_by_index(begin = var_2725_begin_0, end = var_2725_end_0, end_mask = var_2725_end_mask_0, x = V_sliding_slot_7_cast_fp16)[name = string("op_2725_cast_fp16")]; + int32 var_2732 = const()[name = string("op_2732"), val = int32(2)]; + bool V_sliding_out_7_interleave_0 = const()[name = string("V_sliding_out_7_interleave_0"), val = bool(false)]; + tensor V_sliding_out_7_cast_fp16 = concat(axis = var_2732, interleave = V_sliding_out_7_interleave_0, values = (var_2725_cast_fp16, v_padded_7_cast_fp16))[name = string("V_sliding_out_7_cast_fp16")]; + tensor K_for_attn_7_begin_0 = const()[name = string("K_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_7_end_0 = const()[name = string("K_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_7_end_mask_0 = const()[name = string("K_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_7_cast_fp16 = slice_by_index(begin = K_for_attn_7_begin_0, end = K_for_attn_7_end_0, end_mask = K_for_attn_7_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("K_for_attn_7_cast_fp16")]; + tensor V_for_attn_7_begin_0 = const()[name = string("V_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_7_end_0 = const()[name = string("V_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_7_end_mask_0 = const()[name = string("V_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_7_cast_fp16 = slice_by_index(begin = V_for_attn_7_begin_0, end = V_for_attn_7_end_0, end_mask = V_for_attn_7_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("V_for_attn_7_cast_fp16")]; + tensor transpose_12_perm_0 = const()[name = string("transpose_12_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_6_reps_0 = const()[name = string("tile_6_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = K_for_attn_7_cast_fp16)[name = string("transpose_157")]; + tensor tile_6_cast_fp16 = tile(reps = tile_6_reps_0, x = transpose_12_cast_fp16)[name = string("tile_6_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_12, x = tile_6_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = reshape_12_cast_fp16)[name = string("transpose_156")]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_13, x = transpose_13_cast_fp16)[name = string("reshape_13_cast_fp16")]; + tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_14_perm_0 = const()[name = string("transpose_14_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_7_reps_0 = const()[name = string("tile_7_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = V_for_attn_7_cast_fp16)[name = string("transpose_155")]; + tensor tile_7_cast_fp16 = tile(reps = tile_7_reps_0, x = transpose_14_cast_fp16)[name = string("tile_7_cast_fp16")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_14, x = tile_7_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor transpose_15_perm_0 = const()[name = string("transpose_15_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_154")]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_15, x = transpose_15_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor V_expanded_7_perm_0 = const()[name = string("V_expanded_7_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor transpose_51_cast_fp16 = transpose(perm = transpose_51_perm_0, x = reshape_13_cast_fp16)[name = string("transpose_153")]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_31_cast_fp16, y = transpose_51_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_3")]; + tensor var_2773 = sub(x = x_67_cast_fp16, y = reduce_max_3)[name = string("op_2773")]; + tensor var_2779 = exp(x = var_2773)[name = string("op_2779")]; + tensor var_2789_axes_0 = const()[name = string("op_2789_axes_0"), val = tensor([-1])]; + bool var_2789_keep_dims_0 = const()[name = string("op_2789_keep_dims_0"), val = bool(true)]; + tensor var_2789 = reduce_sum(axes = var_2789_axes_0, keep_dims = var_2789_keep_dims_0, x = var_2779)[name = string("op_2789")]; + tensor var_2795_cast_fp16 = real_div(x = var_2779, y = var_2789)[name = string("op_2795_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor V_expanded_7_cast_fp16 = transpose(perm = V_expanded_7_perm_0, x = reshape_15_cast_fp16)[name = string("transpose_152")]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_2795_cast_fp16, y = V_expanded_7_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_2806 = const()[name = string("op_2806"), val = tensor([0, 2, 1, 3])]; + tensor var_2813 = const()[name = string("op_2813"), val = tensor([1, 1, -1])]; + tensor var_2807_cast_fp16 = transpose(perm = var_2806, x = attn_output_19_cast_fp16)[name = string("transpose_151")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_2813, x = var_2807_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_2818 = const()[name = string("op_2818"), val = tensor([0, 2, 1])]; + string var_2834_pad_type_0 = const()[name = string("op_2834_pad_type_0"), val = string("valid")]; + int32 var_2834_groups_0 = const()[name = string("op_2834_groups_0"), val = int32(1)]; + tensor var_2834_strides_0 = const()[name = string("op_2834_strides_0"), val = tensor([1])]; + tensor var_2834_pad_0 = const()[name = string("op_2834_pad_0"), val = tensor([0, 0])]; + tensor var_2834_dilations_0 = const()[name = string("op_2834_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553955648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556577152))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2819_cast_fp16 = transpose(perm = var_2818, x = attn_output_21_cast_fp16)[name = string("transpose_150")]; + tensor var_2834_cast_fp16 = conv(dilations = var_2834_dilations_0, groups = var_2834_groups_0, pad = var_2834_pad_0, pad_type = var_2834_pad_type_0, strides = var_2834_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_2819_cast_fp16)[name = string("op_2834_cast_fp16")]; + tensor var_2838 = const()[name = string("op_2838"), val = tensor([0, 2, 1])]; + int32 var_2844 = const()[name = string("op_2844"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_2838, x = var_2834_cast_fp16)[name = string("transpose_149")]; + tensor var_2846_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2846_cast_fp16")]; + bool input_109_interleave_0 = const()[name = string("input_109_interleave_0"), val = bool(false)]; + tensor input_109_cast_fp16 = concat(axis = var_2844, interleave = input_109_interleave_0, values = (x_71_cast_fp16, var_2846_cast_fp16))[name = string("input_109_cast_fp16")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_2841_to_fp16 = const()[name = string("op_2841_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2841_to_fp16, x = input_109_cast_fp16)[name = string("normed_101_cast_fp16")]; + tensor var_2851_split_sizes_0 = const()[name = string("op_2851_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2851_axis_0 = const()[name = string("op_2851_axis_0"), val = int32(-1)]; + tensor var_2851_cast_fp16_0, tensor var_2851_cast_fp16_1 = split(axis = var_2851_axis_0, split_sizes = var_2851_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2851_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556579776)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_2851_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_59_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_2860 = const()[name = string("op_2860"), val = int32(-1)]; + fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2862_cast_fp16 = mul(x = x_73_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_2862_cast_fp16")]; + bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; + tensor input_111_cast_fp16 = concat(axis = var_2860, interleave = input_111_interleave_0, values = (x_73_cast_fp16, var_2862_cast_fp16))[name = string("input_111_cast_fp16")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_2857_to_fp16 = const()[name = string("op_2857_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2857_to_fp16, x = input_111_cast_fp16)[name = string("normed_105_cast_fp16")]; + tensor var_2867_split_sizes_0 = const()[name = string("op_2867_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2867_axis_0 = const()[name = string("op_2867_axis_0"), val = int32(-1)]; + tensor var_2867_cast_fp16_0, tensor var_2867_cast_fp16_1 = split(axis = var_2867_axis_0, split_sizes = var_2867_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2867_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556584960)))]; + tensor h_21_cast_fp16 = mul(x = var_2867_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_2878 = const()[name = string("op_2878"), val = tensor([0, 2, 1])]; + tensor input_113_axes_0 = const()[name = string("input_113_axes_0"), val = tensor([2])]; + tensor var_2879 = transpose(perm = var_2878, x = h_21_cast_fp16)[name = string("transpose_148")]; + tensor input_113 = expand_dims(axes = input_113_axes_0, x = var_2879)[name = string("input_113")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_113)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_113)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_115 = mul(x = gate_15, y = up_7)[name = string("input_115")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_115)[name = string("mlp_out_7")]; + tensor var_2919_axes_0 = const()[name = string("op_2919_axes_0"), val = tensor([2])]; + tensor var_2919 = squeeze(axes = var_2919_axes_0, x = mlp_out_7)[name = string("op_2919")]; + tensor var_2923 = const()[name = string("op_2923"), val = tensor([0, 2, 1])]; + int32 var_2929 = const()[name = string("op_2929"), val = int32(-1)]; + fp16 const_46_promoted = const()[name = string("const_46_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_2923, x = var_2919)[name = string("transpose_147")]; + tensor var_2931 = mul(x = x_75, y = const_46_promoted)[name = string("op_2931")]; + bool input_117_interleave_0 = const()[name = string("input_117_interleave_0"), val = bool(false)]; + tensor input_117 = concat(axis = var_2929, interleave = input_117_interleave_0, values = (x_75, var_2931))[name = string("input_117")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_2926_to_fp16 = const()[name = string("op_2926_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2926_to_fp16, x = input_117)[name = string("normed_109_cast_fp16")]; + tensor var_2936_split_sizes_0 = const()[name = string("op_2936_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2936_axis_0 = const()[name = string("op_2936_axis_0"), val = int32(-1)]; + tensor var_2936_0, tensor var_2936_1 = split(axis = var_2936_axis_0, split_sizes = var_2936_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2936")]; + tensor hidden_states_33 = mul(x = var_2936_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 768])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 1, 1024])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_2964 = const()[name = string("op_2964"), val = tensor([0, 2, 1])]; + tensor input_119_axes_0 = const()[name = string("input_119_axes_0"), val = tensor([2])]; + tensor var_2965 = transpose(perm = var_2964, x = hidden_states_35_cast_fp16)[name = string("transpose_146")]; + tensor input_119 = expand_dims(axes = input_119_axes_0, x = var_2965)[name = string("input_119")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_119)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_2984 = const()[name = string("op_2984"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_2985_cast_fp16 = transpose(perm = var_2984, x = per_layer_slice_7_cast_fp16)[name = string("transpose_145")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_2985_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_121_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_121_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556590144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556917888))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_121_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_3001_axes_0 = const()[name = string("op_3001_axes_0"), val = tensor([2])]; + tensor var_3001_cast_fp16 = squeeze(axes = var_3001_axes_0, x = gated_23_cast_fp16)[name = string("op_3001_cast_fp16")]; + tensor var_3005 = const()[name = string("op_3005"), val = tensor([0, 2, 1])]; + int32 var_3011 = const()[name = string("op_3011"), val = int32(-1)]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_3005, x = var_3001_cast_fp16)[name = string("transpose_144")]; + tensor var_3013_cast_fp16 = mul(x = x_77_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_3013_cast_fp16")]; + bool input_123_interleave_0 = const()[name = string("input_123_interleave_0"), val = bool(false)]; + tensor input_123_cast_fp16 = concat(axis = var_3011, interleave = input_123_interleave_0, values = (x_77_cast_fp16, var_3013_cast_fp16))[name = string("input_123_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_3008_to_fp16 = const()[name = string("op_3008_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_3008_to_fp16, x = input_123_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor var_3018_split_sizes_0 = const()[name = string("op_3018_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3018_axis_0 = const()[name = string("op_3018_axis_0"), val = int32(-1)]; + tensor var_3018_cast_fp16_0, tensor var_3018_cast_fp16_1 = split(axis = var_3018_axis_0, split_sizes = var_3018_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_3018_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556920512)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_3018_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = tensor([0x1.6cp-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_48_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + tensor var_3030_axes_0 = const()[name = string("op_3030_axes_0"), val = tensor([0])]; + tensor var_3030_cast_fp16 = squeeze(axes = var_3030_axes_0, x = K_sliding_out_7_cast_fp16)[name = string("op_3030_cast_fp16")]; + tensor var_3032_axes_0 = const()[name = string("op_3032_axes_0"), val = tensor([0])]; + tensor var_3032_cast_fp16 = squeeze(axes = var_3032_axes_0, x = V_sliding_out_7_cast_fp16)[name = string("op_3032_cast_fp16")]; + tensor var_3035_begin_0 = const()[name = string("op_3035_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3035_end_0 = const()[name = string("op_3035_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_3035_end_mask_0 = const()[name = string("op_3035_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3035_squeeze_mask_0 = const()[name = string("op_3035_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3035_cast_fp16 = slice_by_index(begin = var_3035_begin_0, end = var_3035_end_0, end_mask = var_3035_end_mask_0, squeeze_mask = var_3035_squeeze_mask_0, x = K_sliding_in)[name = string("op_3035_cast_fp16")]; + tensor K_sliding_slot_9_axes_0 = const()[name = string("K_sliding_slot_9_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_9_cast_fp16 = expand_dims(axes = K_sliding_slot_9_axes_0, x = var_3035_cast_fp16)[name = string("K_sliding_slot_9_cast_fp16")]; + tensor var_3040_begin_0 = const()[name = string("op_3040_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3040_end_0 = const()[name = string("op_3040_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_3040_end_mask_0 = const()[name = string("op_3040_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3040_squeeze_mask_0 = const()[name = string("op_3040_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3040_cast_fp16 = slice_by_index(begin = var_3040_begin_0, end = var_3040_end_0, end_mask = var_3040_end_mask_0, squeeze_mask = var_3040_squeeze_mask_0, x = V_sliding_in)[name = string("op_3040_cast_fp16")]; + tensor V_sliding_slot_9_axes_0 = const()[name = string("V_sliding_slot_9_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_9_cast_fp16 = expand_dims(axes = V_sliding_slot_9_axes_0, x = var_3040_cast_fp16)[name = string("V_sliding_slot_9_cast_fp16")]; + int32 var_3047 = const()[name = string("op_3047"), val = int32(-1)]; + fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3049_cast_fp16 = mul(x = x_79_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_3049_cast_fp16")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125_cast_fp16 = concat(axis = var_3047, interleave = input_125_interleave_0, values = (x_79_cast_fp16, var_3049_cast_fp16))[name = string("input_125_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_3044_to_fp16 = const()[name = string("op_3044_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_3044_to_fp16, x = input_125_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor var_3054_split_sizes_0 = const()[name = string("op_3054_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3054_axis_0 = const()[name = string("op_3054_axis_0"), val = int32(-1)]; + tensor var_3054_cast_fp16_0, tensor var_3054_cast_fp16_1 = split(axis = var_3054_axis_0, split_sizes = var_3054_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_3054_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556925696)))]; + tensor h_25_cast_fp16 = mul(x = var_3054_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_3060 = const()[name = string("op_3060"), val = tensor([0, 2, 1])]; + tensor var_3063_axes_0 = const()[name = string("op_3063_axes_0"), val = tensor([2])]; + tensor var_3061_cast_fp16 = transpose(perm = var_3060, x = h_25_cast_fp16)[name = string("transpose_143")]; + tensor var_3063_cast_fp16 = expand_dims(axes = var_3063_axes_0, x = var_3061_cast_fp16)[name = string("op_3063_cast_fp16")]; + string var_3079_pad_type_0 = const()[name = string("op_3079_pad_type_0"), val = string("valid")]; + tensor var_3079_strides_0 = const()[name = string("op_3079_strides_0"), val = tensor([1, 1])]; + tensor var_3079_pad_0 = const()[name = string("op_3079_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3079_dilations_0 = const()[name = string("op_3079_dilations_0"), val = tensor([1, 1])]; + int32 var_3079_groups_0 = const()[name = string("op_3079_groups_0"), val = int32(1)]; + tensor var_3079 = conv(dilations = var_3079_dilations_0, groups = var_3079_groups_0, pad = var_3079_pad_0, pad_type = var_3079_pad_type_0, strides = var_3079_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_3063_cast_fp16)[name = string("op_3079")]; + tensor var_3084 = const()[name = string("op_3084"), val = tensor([1, 8, 256, 1])]; + tensor var_3085 = reshape(shape = var_3084, x = var_3079)[name = string("op_3085")]; + tensor var_3090 = const()[name = string("op_3090"), val = tensor([0, 1, 3, 2])]; + tensor var_3100 = const()[name = string("op_3100"), val = tensor([1, 8, 256])]; + tensor var_3091 = transpose(perm = var_3090, x = var_3085)[name = string("transpose_142")]; + tensor x_81 = reshape(shape = var_3100, x = var_3091)[name = string("x_81")]; + int32 var_3106 = const()[name = string("op_3106"), val = int32(-1)]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_3108 = mul(x = x_81, y = const_50_promoted)[name = string("op_3108")]; + bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)]; + tensor input_129 = concat(axis = var_3106, interleave = input_129_interleave_0, values = (x_81, var_3108))[name = string("input_129")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_3103_to_fp16 = const()[name = string("op_3103_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_3103_to_fp16, x = input_129)[name = string("normed_121_cast_fp16")]; + tensor var_3113_split_sizes_0 = const()[name = string("op_3113_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3113_axis_0 = const()[name = string("op_3113_axis_0"), val = int32(-1)]; + tensor var_3113_0, tensor var_3113_1 = split(axis = var_3113_axis_0, split_sizes = var_3113_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_3113")]; + tensor var_3115 = mul(x = var_3113_0, y = layers_4_self_attn_q_norm_weight)[name = string("op_3115")]; + tensor var_3120 = const()[name = string("op_3120"), val = tensor([1, 8, 1, 256])]; + tensor q_35 = reshape(shape = var_3120, x = var_3115)[name = string("q_35")]; + tensor var_3122_cast_fp16 = mul(x = q_35, y = cos_s)[name = string("op_3122_cast_fp16")]; + tensor var_3123_split_sizes_0 = const()[name = string("op_3123_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3123_axis_0 = const()[name = string("op_3123_axis_0"), val = int32(-1)]; + tensor var_3123_0, tensor var_3123_1 = split(axis = var_3123_axis_0, split_sizes = var_3123_split_sizes_0, x = q_35)[name = string("op_3123")]; + fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; + tensor var_3125 = mul(x = var_3123_1, y = const_51_promoted)[name = string("op_3125")]; + int32 var_3127 = const()[name = string("op_3127"), val = int32(-1)]; + bool var_3128_interleave_0 = const()[name = string("op_3128_interleave_0"), val = bool(false)]; + tensor var_3128 = concat(axis = var_3127, interleave = var_3128_interleave_0, values = (var_3125, var_3123_0))[name = string("op_3128")]; + tensor var_3129_cast_fp16 = mul(x = var_3128, y = sin_s)[name = string("op_3129_cast_fp16")]; + tensor q_39_cast_fp16 = add(x = var_3122_cast_fp16, y = var_3129_cast_fp16)[name = string("q_39_cast_fp16")]; + string var_3142_pad_type_0 = const()[name = string("op_3142_pad_type_0"), val = string("valid")]; + tensor var_3142_strides_0 = const()[name = string("op_3142_strides_0"), val = tensor([1, 1])]; + tensor var_3142_pad_0 = const()[name = string("op_3142_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3142_dilations_0 = const()[name = string("op_3142_dilations_0"), val = tensor([1, 1])]; + int32 var_3142_groups_0 = const()[name = string("op_3142_groups_0"), val = int32(1)]; + tensor var_3142 = conv(dilations = var_3142_dilations_0, groups = var_3142_groups_0, pad = var_3142_pad_0, pad_type = var_3142_pad_type_0, strides = var_3142_strides_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = var_3063_cast_fp16)[name = string("op_3142")]; + tensor var_3147 = const()[name = string("op_3147"), val = tensor([1, 2, 256, 1])]; + tensor var_3148 = reshape(shape = var_3147, x = var_3142)[name = string("op_3148")]; + tensor var_3153 = const()[name = string("op_3153"), val = tensor([0, 1, 3, 2])]; + string var_3170_pad_type_0 = const()[name = string("op_3170_pad_type_0"), val = string("valid")]; + tensor var_3170_strides_0 = const()[name = string("op_3170_strides_0"), val = tensor([1, 1])]; + tensor var_3170_pad_0 = const()[name = string("op_3170_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3170_dilations_0 = const()[name = string("op_3170_dilations_0"), val = tensor([1, 1])]; + int32 var_3170_groups_0 = const()[name = string("op_3170_groups_0"), val = int32(1)]; + tensor var_3170 = conv(dilations = var_3170_dilations_0, groups = var_3170_groups_0, pad = var_3170_pad_0, pad_type = var_3170_pad_type_0, strides = var_3170_strides_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = var_3063_cast_fp16)[name = string("op_3170")]; + tensor var_3175 = const()[name = string("op_3175"), val = tensor([1, 2, 256, 1])]; + tensor var_3176 = reshape(shape = var_3175, x = var_3170)[name = string("op_3176")]; + tensor var_3181 = const()[name = string("op_3181"), val = tensor([0, 1, 3, 2])]; + tensor var_3191 = const()[name = string("op_3191"), val = tensor([1, 2, 256])]; + tensor var_3154 = transpose(perm = var_3153, x = var_3148)[name = string("transpose_141")]; + tensor x_83 = reshape(shape = var_3191, x = var_3154)[name = string("x_83")]; + int32 var_3197 = const()[name = string("op_3197"), val = int32(-1)]; + fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)]; + tensor var_3199 = mul(x = x_83, y = const_52_promoted)[name = string("op_3199")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131 = concat(axis = var_3197, interleave = input_131_interleave_0, values = (x_83, var_3199))[name = string("input_131")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_3194_to_fp16 = const()[name = string("op_3194_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_3194_to_fp16, x = input_131)[name = string("normed_125_cast_fp16")]; + tensor var_3204_split_sizes_0 = const()[name = string("op_3204_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3204_axis_0 = const()[name = string("op_3204_axis_0"), val = int32(-1)]; + tensor var_3204_0, tensor var_3204_1 = split(axis = var_3204_axis_0, split_sizes = var_3204_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_3204")]; + tensor var_3206 = mul(x = var_3204_0, y = layers_4_self_attn_k_norm_weight)[name = string("op_3206")]; + tensor var_3211 = const()[name = string("op_3211"), val = tensor([1, 2, 1, 256])]; + tensor q_37 = reshape(shape = var_3211, x = var_3206)[name = string("q_37")]; + fp16 var_3213_promoted = const()[name = string("op_3213_promoted"), val = fp16(0x1p+1)]; + tensor var_3182 = transpose(perm = var_3181, x = var_3176)[name = string("transpose_140")]; + tensor var_3214 = pow(x = var_3182, y = var_3213_promoted)[name = string("op_3214")]; + tensor var_3219_axes_0 = const()[name = string("op_3219_axes_0"), val = tensor([-1])]; + bool var_3219_keep_dims_0 = const()[name = string("op_3219_keep_dims_0"), val = bool(true)]; + tensor var_3219 = reduce_mean(axes = var_3219_axes_0, keep_dims = var_3219_keep_dims_0, x = var_3214)[name = string("op_3219")]; + fp16 var_3221_to_fp16 = const()[name = string("op_3221_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_9_cast_fp16 = add(x = var_3219, y = var_3221_to_fp16)[name = string("mean_sq_9_cast_fp16")]; + fp32 var_3223_epsilon_0 = const()[name = string("op_3223_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3223_cast_fp16 = rsqrt(epsilon = var_3223_epsilon_0, x = mean_sq_9_cast_fp16)[name = string("op_3223_cast_fp16")]; + tensor input_135_cast_fp16 = mul(x = var_3182, y = var_3223_cast_fp16)[name = string("input_135_cast_fp16")]; + tensor var_3225_cast_fp16 = mul(x = q_37, y = cos_s)[name = string("op_3225_cast_fp16")]; + tensor var_3226_split_sizes_0 = const()[name = string("op_3226_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3226_axis_0 = const()[name = string("op_3226_axis_0"), val = int32(-1)]; + tensor var_3226_0, tensor var_3226_1 = split(axis = var_3226_axis_0, split_sizes = var_3226_split_sizes_0, x = q_37)[name = string("op_3226")]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor var_3228 = mul(x = var_3226_1, y = const_53_promoted)[name = string("op_3228")]; + int32 var_3230 = const()[name = string("op_3230"), val = int32(-1)]; + bool var_3231_interleave_0 = const()[name = string("op_3231_interleave_0"), val = bool(false)]; + tensor var_3231 = concat(axis = var_3230, interleave = var_3231_interleave_0, values = (var_3228, var_3226_0))[name = string("op_3231")]; + tensor var_3232_cast_fp16 = mul(x = var_3231, y = sin_s)[name = string("op_3232_cast_fp16")]; + tensor input_133_cast_fp16 = add(x = var_3225_cast_fp16, y = var_3232_cast_fp16)[name = string("input_133_cast_fp16")]; + tensor k_padded_9_pad_0 = const()[name = string("k_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_9_mode_0 = const()[name = string("k_padded_9_mode_0"), val = string("constant")]; + fp16 const_54_to_fp16 = const()[name = string("const_54_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_9_cast_fp16 = pad(constant_val = const_54_to_fp16, mode = k_padded_9_mode_0, pad = k_padded_9_pad_0, x = input_133_cast_fp16)[name = string("k_padded_9_cast_fp16")]; + tensor v_padded_9_pad_0 = const()[name = string("v_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_9_mode_0 = const()[name = string("v_padded_9_mode_0"), val = string("constant")]; + fp16 const_55_to_fp16 = const()[name = string("const_55_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_9_cast_fp16 = pad(constant_val = const_55_to_fp16, mode = v_padded_9_mode_0, pad = v_padded_9_pad_0, x = input_135_cast_fp16)[name = string("v_padded_9_cast_fp16")]; + tensor var_3261_begin_0 = const()[name = string("op_3261_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_3261_end_0 = const()[name = string("op_3261_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3261_end_mask_0 = const()[name = string("op_3261_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3261_cast_fp16 = slice_by_index(begin = var_3261_begin_0, end = var_3261_end_0, end_mask = var_3261_end_mask_0, x = K_sliding_slot_9_cast_fp16)[name = string("op_3261_cast_fp16")]; + int32 var_3268 = const()[name = string("op_3268"), val = int32(2)]; + bool K_sliding_out_9_interleave_0 = const()[name = string("K_sliding_out_9_interleave_0"), val = bool(false)]; + tensor K_sliding_out_9_cast_fp16 = concat(axis = var_3268, interleave = K_sliding_out_9_interleave_0, values = (var_3261_cast_fp16, k_padded_9_cast_fp16))[name = string("K_sliding_out_9_cast_fp16")]; + tensor var_3284_begin_0 = const()[name = string("op_3284_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_3284_end_0 = const()[name = string("op_3284_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3284_end_mask_0 = const()[name = string("op_3284_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3284_cast_fp16 = slice_by_index(begin = var_3284_begin_0, end = var_3284_end_0, end_mask = var_3284_end_mask_0, x = V_sliding_slot_9_cast_fp16)[name = string("op_3284_cast_fp16")]; + int32 var_3291 = const()[name = string("op_3291"), val = int32(2)]; + bool V_sliding_out_9_interleave_0 = const()[name = string("V_sliding_out_9_interleave_0"), val = bool(false)]; + tensor V_sliding_out_9_cast_fp16 = concat(axis = var_3291, interleave = V_sliding_out_9_interleave_0, values = (var_3284_cast_fp16, v_padded_9_cast_fp16))[name = string("V_sliding_out_9_cast_fp16")]; + tensor K_for_attn_9_begin_0 = const()[name = string("K_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_9_end_0 = const()[name = string("K_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_9_end_mask_0 = const()[name = string("K_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_9_cast_fp16 = slice_by_index(begin = K_for_attn_9_begin_0, end = K_for_attn_9_end_0, end_mask = K_for_attn_9_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("K_for_attn_9_cast_fp16")]; + tensor V_for_attn_9_begin_0 = const()[name = string("V_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_9_end_0 = const()[name = string("V_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_9_end_mask_0 = const()[name = string("V_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_9_cast_fp16 = slice_by_index(begin = V_for_attn_9_begin_0, end = V_for_attn_9_end_0, end_mask = V_for_attn_9_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("V_for_attn_9_cast_fp16")]; + tensor transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = K_for_attn_9_cast_fp16)[name = string("transpose_139")]; + tensor tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_16, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")]; + tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_138")]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_17, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = V_for_attn_9_cast_fp16)[name = string("transpose_137")]; + tensor tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_18, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_136")]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_19, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")]; + tensor V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor transpose_52_cast_fp16 = transpose(perm = transpose_52_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_135")]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_39_cast_fp16, y = transpose_52_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_87_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_87_cast_fp16)[name = string("reduce_max_4")]; + tensor var_3332 = sub(x = x_87_cast_fp16, y = reduce_max_4)[name = string("op_3332")]; + tensor var_3338 = exp(x = var_3332)[name = string("op_3338")]; + tensor var_3348_axes_0 = const()[name = string("op_3348_axes_0"), val = tensor([-1])]; + bool var_3348_keep_dims_0 = const()[name = string("op_3348_keep_dims_0"), val = bool(true)]; + tensor var_3348 = reduce_sum(axes = var_3348_axes_0, keep_dims = var_3348_keep_dims_0, x = var_3338)[name = string("op_3348")]; + tensor var_3354_cast_fp16 = real_div(x = var_3338, y = var_3348)[name = string("op_3354_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_134")]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_3354_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3365 = const()[name = string("op_3365"), val = tensor([0, 2, 1, 3])]; + tensor var_3372 = const()[name = string("op_3372"), val = tensor([1, 1, -1])]; + tensor var_3366_cast_fp16 = transpose(perm = var_3365, x = attn_output_25_cast_fp16)[name = string("transpose_133")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_3372, x = var_3366_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_3377 = const()[name = string("op_3377"), val = tensor([0, 2, 1])]; + string var_3393_pad_type_0 = const()[name = string("op_3393_pad_type_0"), val = string("valid")]; + int32 var_3393_groups_0 = const()[name = string("op_3393_groups_0"), val = int32(1)]; + tensor var_3393_strides_0 = const()[name = string("op_3393_strides_0"), val = tensor([1])]; + tensor var_3393_pad_0 = const()[name = string("op_3393_pad_0"), val = tensor([0, 0])]; + tensor var_3393_dilations_0 = const()[name = string("op_3393_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556930880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559552384))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3378_cast_fp16 = transpose(perm = var_3377, x = attn_output_27_cast_fp16)[name = string("transpose_132")]; + tensor var_3393_cast_fp16 = conv(dilations = var_3393_dilations_0, groups = var_3393_groups_0, pad = var_3393_pad_0, pad_type = var_3393_pad_type_0, strides = var_3393_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3378_cast_fp16)[name = string("op_3393_cast_fp16")]; + tensor var_3397 = const()[name = string("op_3397"), val = tensor([0, 2, 1])]; + int32 var_3403 = const()[name = string("op_3403"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_91_cast_fp16 = transpose(perm = var_3397, x = var_3393_cast_fp16)[name = string("transpose_131")]; + tensor var_3405_cast_fp16 = mul(x = x_91_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3405_cast_fp16")]; + bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; + tensor input_139_cast_fp16 = concat(axis = var_3403, interleave = input_139_interleave_0, values = (x_91_cast_fp16, var_3405_cast_fp16))[name = string("input_139_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_3400_to_fp16 = const()[name = string("op_3400_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_3400_to_fp16, x = input_139_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_3410_split_sizes_0 = const()[name = string("op_3410_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3410_axis_0 = const()[name = string("op_3410_axis_0"), val = int32(-1)]; + tensor var_3410_cast_fp16_0, tensor var_3410_cast_fp16_1 = split(axis = var_3410_axis_0, split_sizes = var_3410_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_3410_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559555008)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_3410_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_93_cast_fp16")]; + int32 var_3419 = const()[name = string("op_3419"), val = int32(-1)]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3421_cast_fp16 = mul(x = x_93_cast_fp16, y = const_57_promoted_to_fp16)[name = string("op_3421_cast_fp16")]; + bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; + tensor input_141_cast_fp16 = concat(axis = var_3419, interleave = input_141_interleave_0, values = (x_93_cast_fp16, var_3421_cast_fp16))[name = string("input_141_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_3416_to_fp16 = const()[name = string("op_3416_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_3416_to_fp16, x = input_141_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor var_3426_split_sizes_0 = const()[name = string("op_3426_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3426_axis_0 = const()[name = string("op_3426_axis_0"), val = int32(-1)]; + tensor var_3426_cast_fp16_0, tensor var_3426_cast_fp16_1 = split(axis = var_3426_axis_0, split_sizes = var_3426_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_3426_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559560192)))]; + tensor h_27_cast_fp16 = mul(x = var_3426_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_3437 = const()[name = string("op_3437"), val = tensor([0, 2, 1])]; + tensor input_143_axes_0 = const()[name = string("input_143_axes_0"), val = tensor([2])]; + tensor var_3438 = transpose(perm = var_3437, x = h_27_cast_fp16)[name = string("transpose_130")]; + tensor input_143 = expand_dims(axes = input_143_axes_0, x = var_3438)[name = string("input_143")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_143)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_143)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_145 = mul(x = gate_19, y = up_9)[name = string("input_145")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_145)[name = string("mlp_out_9")]; + tensor var_3478_axes_0 = const()[name = string("op_3478_axes_0"), val = tensor([2])]; + tensor var_3478 = squeeze(axes = var_3478_axes_0, x = mlp_out_9)[name = string("op_3478")]; + tensor var_3482 = const()[name = string("op_3482"), val = tensor([0, 2, 1])]; + int32 var_3488 = const()[name = string("op_3488"), val = int32(-1)]; + fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)]; + tensor x_95 = transpose(perm = var_3482, x = var_3478)[name = string("transpose_129")]; + tensor var_3490 = mul(x = x_95, y = const_58_promoted)[name = string("op_3490")]; + bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; + tensor input_147 = concat(axis = var_3488, interleave = input_147_interleave_0, values = (x_95, var_3490))[name = string("input_147")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_3485_to_fp16 = const()[name = string("op_3485_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3485_to_fp16, x = input_147)[name = string("normed_137_cast_fp16")]; + tensor var_3495_split_sizes_0 = const()[name = string("op_3495_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3495_axis_0 = const()[name = string("op_3495_axis_0"), val = int32(-1)]; + tensor var_3495_0, tensor var_3495_1 = split(axis = var_3495_axis_0, split_sizes = var_3495_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3495")]; + tensor hidden_states_43 = mul(x = var_3495_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_93_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 1024])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 1, 1280])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_3523 = const()[name = string("op_3523"), val = tensor([0, 2, 1])]; + tensor input_149_axes_0 = const()[name = string("input_149_axes_0"), val = tensor([2])]; + tensor var_3524 = transpose(perm = var_3523, x = hidden_states_45_cast_fp16)[name = string("transpose_128")]; + tensor input_149 = expand_dims(axes = input_149_axes_0, x = var_3524)[name = string("input_149")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_149)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_3543 = const()[name = string("op_3543"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_3544_cast_fp16 = transpose(perm = var_3543, x = per_layer_slice_9_cast_fp16)[name = string("transpose_127")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_3544_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_151_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_151_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559565376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559893120))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_151_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_3560_axes_0 = const()[name = string("op_3560_axes_0"), val = tensor([2])]; + tensor var_3560_cast_fp16 = squeeze(axes = var_3560_axes_0, x = gated_29_cast_fp16)[name = string("op_3560_cast_fp16")]; + tensor var_3564 = const()[name = string("op_3564"), val = tensor([0, 2, 1])]; + int32 var_3570 = const()[name = string("op_3570"), val = int32(-1)]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_97_cast_fp16 = transpose(perm = var_3564, x = var_3560_cast_fp16)[name = string("transpose_126")]; + tensor var_3572_cast_fp16 = mul(x = x_97_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3572_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_3570, interleave = input_153_interleave_0, values = (x_97_cast_fp16, var_3572_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_3567_to_fp16 = const()[name = string("op_3567_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3567_to_fp16, x = input_153_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_3577_split_sizes_0 = const()[name = string("op_3577_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3577_axis_0 = const()[name = string("op_3577_axis_0"), val = int32(-1)]; + tensor var_3577_cast_fp16_0, tensor var_3577_cast_fp16_1 = split(axis = var_3577_axis_0, split_sizes = var_3577_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3577_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559895744)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_3577_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = tensor([0x1.2cp-1])]; + tensor x_99_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_60_promoted_to_fp16)[name = string("x_99_cast_fp16")]; + tensor var_3589_axes_0 = const()[name = string("op_3589_axes_0"), val = tensor([0])]; + tensor var_3589_cast_fp16 = squeeze(axes = var_3589_axes_0, x = K_sliding_out_9_cast_fp16)[name = string("op_3589_cast_fp16")]; + tensor var_3591_axes_0 = const()[name = string("op_3591_axes_0"), val = tensor([0])]; + tensor var_3591_cast_fp16 = squeeze(axes = var_3591_axes_0, x = V_sliding_out_9_cast_fp16)[name = string("op_3591_cast_fp16")]; + tensor var_3594_begin_0 = const()[name = string("op_3594_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3594_end_0 = const()[name = string("op_3594_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_3594_end_mask_0 = const()[name = string("op_3594_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3594_squeeze_mask_0 = const()[name = string("op_3594_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3594_cast_fp16 = slice_by_index(begin = var_3594_begin_0, end = var_3594_end_0, end_mask = var_3594_end_mask_0, squeeze_mask = var_3594_squeeze_mask_0, x = K_full_in)[name = string("op_3594_cast_fp16")]; + tensor K_full_slot_1_axes_0 = const()[name = string("K_full_slot_1_axes_0"), val = tensor([0])]; + tensor K_full_slot_1_cast_fp16 = expand_dims(axes = K_full_slot_1_axes_0, x = var_3594_cast_fp16)[name = string("K_full_slot_1_cast_fp16")]; + tensor var_3599_begin_0 = const()[name = string("op_3599_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3599_end_0 = const()[name = string("op_3599_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_3599_end_mask_0 = const()[name = string("op_3599_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3599_squeeze_mask_0 = const()[name = string("op_3599_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3599_cast_fp16 = slice_by_index(begin = var_3599_begin_0, end = var_3599_end_0, end_mask = var_3599_end_mask_0, squeeze_mask = var_3599_squeeze_mask_0, x = V_full_in)[name = string("op_3599_cast_fp16")]; + tensor V_full_slot_1_axes_0 = const()[name = string("V_full_slot_1_axes_0"), val = tensor([0])]; + tensor V_full_slot_1_cast_fp16 = expand_dims(axes = V_full_slot_1_axes_0, x = var_3599_cast_fp16)[name = string("V_full_slot_1_cast_fp16")]; + int32 var_3606 = const()[name = string("op_3606"), val = int32(-1)]; + fp16 const_61_promoted_to_fp16 = const()[name = string("const_61_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3608_cast_fp16 = mul(x = x_99_cast_fp16, y = const_61_promoted_to_fp16)[name = string("op_3608_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_3606, interleave = input_155_interleave_0, values = (x_99_cast_fp16, var_3608_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_3603_to_fp16 = const()[name = string("op_3603_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_3603_to_fp16, x = input_155_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor var_3613_split_sizes_0 = const()[name = string("op_3613_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3613_axis_0 = const()[name = string("op_3613_axis_0"), val = int32(-1)]; + tensor var_3613_cast_fp16_0, tensor var_3613_cast_fp16_1 = split(axis = var_3613_axis_0, split_sizes = var_3613_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_3613_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559900928)))]; + tensor h_31_cast_fp16 = mul(x = var_3613_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_3619 = const()[name = string("op_3619"), val = tensor([0, 2, 1])]; + tensor var_3622_axes_0 = const()[name = string("op_3622_axes_0"), val = tensor([2])]; + tensor var_3620_cast_fp16 = transpose(perm = var_3619, x = h_31_cast_fp16)[name = string("transpose_125")]; + tensor var_3622_cast_fp16 = expand_dims(axes = var_3622_axes_0, x = var_3620_cast_fp16)[name = string("op_3622_cast_fp16")]; + string var_3638_pad_type_0 = const()[name = string("op_3638_pad_type_0"), val = string("valid")]; + tensor var_3638_strides_0 = const()[name = string("op_3638_strides_0"), val = tensor([1, 1])]; + tensor var_3638_pad_0 = const()[name = string("op_3638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3638_dilations_0 = const()[name = string("op_3638_dilations_0"), val = tensor([1, 1])]; + int32 var_3638_groups_0 = const()[name = string("op_3638_groups_0"), val = int32(1)]; + tensor var_3638 = conv(dilations = var_3638_dilations_0, groups = var_3638_groups_0, pad = var_3638_pad_0, pad_type = var_3638_pad_type_0, strides = var_3638_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_3622_cast_fp16)[name = string("op_3638")]; + tensor var_3643 = const()[name = string("op_3643"), val = tensor([1, 8, 512, 1])]; + tensor var_3644 = reshape(shape = var_3643, x = var_3638)[name = string("op_3644")]; + tensor var_3649 = const()[name = string("op_3649"), val = tensor([0, 1, 3, 2])]; + tensor var_3659 = const()[name = string("op_3659"), val = tensor([1, 8, 512])]; + tensor var_3650 = transpose(perm = var_3649, x = var_3644)[name = string("transpose_124")]; + tensor x_101 = reshape(shape = var_3659, x = var_3650)[name = string("x_101")]; + int32 var_3665 = const()[name = string("op_3665"), val = int32(-1)]; + fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; + tensor var_3667 = mul(x = x_101, y = const_62_promoted)[name = string("op_3667")]; + bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; + tensor input_159 = concat(axis = var_3665, interleave = input_159_interleave_0, values = (x_101, var_3667))[name = string("input_159")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_3662_to_fp16 = const()[name = string("op_3662_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_3662_to_fp16, x = input_159)[name = string("normed_149_cast_fp16")]; + tensor var_3672_split_sizes_0 = const()[name = string("op_3672_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3672_axis_0 = const()[name = string("op_3672_axis_0"), val = int32(-1)]; + tensor var_3672_0, tensor var_3672_1 = split(axis = var_3672_axis_0, split_sizes = var_3672_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_3672")]; + tensor var_3674 = mul(x = var_3672_0, y = layers_5_self_attn_q_norm_weight)[name = string("op_3674")]; + tensor var_3679 = const()[name = string("op_3679"), val = tensor([1, 8, 1, 512])]; + tensor q_43 = reshape(shape = var_3679, x = var_3674)[name = string("q_43")]; + tensor var_3681_cast_fp16 = mul(x = q_43, y = cos_f)[name = string("op_3681_cast_fp16")]; + tensor var_3682_split_sizes_0 = const()[name = string("op_3682_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3682_axis_0 = const()[name = string("op_3682_axis_0"), val = int32(-1)]; + tensor var_3682_0, tensor var_3682_1 = split(axis = var_3682_axis_0, split_sizes = var_3682_split_sizes_0, x = q_43)[name = string("op_3682")]; + fp16 const_63_promoted = const()[name = string("const_63_promoted"), val = fp16(-0x1p+0)]; + tensor var_3684 = mul(x = var_3682_1, y = const_63_promoted)[name = string("op_3684")]; + int32 var_3686 = const()[name = string("op_3686"), val = int32(-1)]; + bool var_3687_interleave_0 = const()[name = string("op_3687_interleave_0"), val = bool(false)]; + tensor var_3687 = concat(axis = var_3686, interleave = var_3687_interleave_0, values = (var_3684, var_3682_0))[name = string("op_3687")]; + tensor var_3688_cast_fp16 = mul(x = var_3687, y = sin_f)[name = string("op_3688_cast_fp16")]; + tensor q_47_cast_fp16 = add(x = var_3681_cast_fp16, y = var_3688_cast_fp16)[name = string("q_47_cast_fp16")]; + string var_3701_pad_type_0 = const()[name = string("op_3701_pad_type_0"), val = string("valid")]; + tensor var_3701_strides_0 = const()[name = string("op_3701_strides_0"), val = tensor([1, 1])]; + tensor var_3701_pad_0 = const()[name = string("op_3701_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3701_dilations_0 = const()[name = string("op_3701_dilations_0"), val = tensor([1, 1])]; + int32 var_3701_groups_0 = const()[name = string("op_3701_groups_0"), val = int32(1)]; + tensor var_3701 = conv(dilations = var_3701_dilations_0, groups = var_3701_groups_0, pad = var_3701_pad_0, pad_type = var_3701_pad_type_0, strides = var_3701_strides_0, weight = layers_5_self_attn_k_proj_weight_palettized, x = var_3622_cast_fp16)[name = string("op_3701")]; + tensor var_3706 = const()[name = string("op_3706"), val = tensor([1, 2, 512, 1])]; + tensor var_3707 = reshape(shape = var_3706, x = var_3701)[name = string("op_3707")]; + tensor var_3712 = const()[name = string("op_3712"), val = tensor([0, 1, 3, 2])]; + string var_3729_pad_type_0 = const()[name = string("op_3729_pad_type_0"), val = string("valid")]; + tensor var_3729_strides_0 = const()[name = string("op_3729_strides_0"), val = tensor([1, 1])]; + tensor var_3729_pad_0 = const()[name = string("op_3729_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3729_dilations_0 = const()[name = string("op_3729_dilations_0"), val = tensor([1, 1])]; + int32 var_3729_groups_0 = const()[name = string("op_3729_groups_0"), val = int32(1)]; + tensor var_3729 = conv(dilations = var_3729_dilations_0, groups = var_3729_groups_0, pad = var_3729_pad_0, pad_type = var_3729_pad_type_0, strides = var_3729_strides_0, weight = layers_5_self_attn_v_proj_weight_palettized, x = var_3622_cast_fp16)[name = string("op_3729")]; + tensor var_3734 = const()[name = string("op_3734"), val = tensor([1, 2, 512, 1])]; + tensor var_3735 = reshape(shape = var_3734, x = var_3729)[name = string("op_3735")]; + tensor var_3740 = const()[name = string("op_3740"), val = tensor([0, 1, 3, 2])]; + tensor var_3750 = const()[name = string("op_3750"), val = tensor([1, 2, 512])]; + tensor var_3713 = transpose(perm = var_3712, x = var_3707)[name = string("transpose_123")]; + tensor x_103 = reshape(shape = var_3750, x = var_3713)[name = string("x_103")]; + int32 var_3756 = const()[name = string("op_3756"), val = int32(-1)]; + fp16 const_64_promoted = const()[name = string("const_64_promoted"), val = fp16(-0x1p+0)]; + tensor var_3758 = mul(x = x_103, y = const_64_promoted)[name = string("op_3758")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161 = concat(axis = var_3756, interleave = input_161_interleave_0, values = (x_103, var_3758))[name = string("input_161")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_3753_to_fp16 = const()[name = string("op_3753_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_3753_to_fp16, x = input_161)[name = string("normed_153_cast_fp16")]; + tensor var_3763_split_sizes_0 = const()[name = string("op_3763_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3763_axis_0 = const()[name = string("op_3763_axis_0"), val = int32(-1)]; + tensor var_3763_0, tensor var_3763_1 = split(axis = var_3763_axis_0, split_sizes = var_3763_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_3763")]; + tensor var_3765 = mul(x = var_3763_0, y = layers_5_self_attn_k_norm_weight)[name = string("op_3765")]; + tensor var_3770 = const()[name = string("op_3770"), val = tensor([1, 2, 1, 512])]; + tensor q_45 = reshape(shape = var_3770, x = var_3765)[name = string("q_45")]; + fp16 var_3772_promoted = const()[name = string("op_3772_promoted"), val = fp16(0x1p+1)]; + tensor var_3741 = transpose(perm = var_3740, x = var_3735)[name = string("transpose_122")]; + tensor var_3773 = pow(x = var_3741, y = var_3772_promoted)[name = string("op_3773")]; + tensor var_3778_axes_0 = const()[name = string("op_3778_axes_0"), val = tensor([-1])]; + bool var_3778_keep_dims_0 = const()[name = string("op_3778_keep_dims_0"), val = bool(true)]; + tensor var_3778 = reduce_mean(axes = var_3778_axes_0, keep_dims = var_3778_keep_dims_0, x = var_3773)[name = string("op_3778")]; + fp16 var_3780_to_fp16 = const()[name = string("op_3780_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_11_cast_fp16 = add(x = var_3778, y = var_3780_to_fp16)[name = string("mean_sq_11_cast_fp16")]; + fp32 var_3782_epsilon_0 = const()[name = string("op_3782_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3782_cast_fp16 = rsqrt(epsilon = var_3782_epsilon_0, x = mean_sq_11_cast_fp16)[name = string("op_3782_cast_fp16")]; + tensor v_1_cast_fp16 = mul(x = var_3741, y = var_3782_cast_fp16)[name = string("v_1_cast_fp16")]; + tensor var_3784_cast_fp16 = mul(x = q_45, y = cos_f)[name = string("op_3784_cast_fp16")]; + tensor var_3785_split_sizes_0 = const()[name = string("op_3785_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3785_axis_0 = const()[name = string("op_3785_axis_0"), val = int32(-1)]; + tensor var_3785_0, tensor var_3785_1 = split(axis = var_3785_axis_0, split_sizes = var_3785_split_sizes_0, x = q_45)[name = string("op_3785")]; + fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)]; + tensor var_3787 = mul(x = var_3785_1, y = const_65_promoted)[name = string("op_3787")]; + int32 var_3789 = const()[name = string("op_3789"), val = int32(-1)]; + bool var_3790_interleave_0 = const()[name = string("op_3790_interleave_0"), val = bool(false)]; + tensor var_3790 = concat(axis = var_3789, interleave = var_3790_interleave_0, values = (var_3787, var_3785_0))[name = string("op_3790")]; + tensor var_3791_cast_fp16 = mul(x = var_3790, y = sin_f)[name = string("op_3791_cast_fp16")]; + tensor k_13_cast_fp16 = add(x = var_3784_cast_fp16, y = var_3791_cast_fp16)[name = string("k_13_cast_fp16")]; + fp16 var_3794_promoted_to_fp16 = const()[name = string("op_3794_promoted_to_fp16"), val = fp16(0x1p+0)]; + tensor var_3796_cast_fp16 = sub(x = var_3794_promoted_to_fp16, y = update_mask)[name = string("op_3796_cast_fp16")]; + tensor var_3797_cast_fp16 = mul(x = K_full_slot_1_cast_fp16, y = var_3796_cast_fp16)[name = string("op_3797_cast_fp16")]; + tensor var_3798_reps_0 = const()[name = string("op_3798_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_3798_cast_fp16 = tile(reps = var_3798_reps_0, x = k_13_cast_fp16)[name = string("op_3798_cast_fp16")]; + tensor var_3799_cast_fp16 = mul(x = var_3798_cast_fp16, y = update_mask)[name = string("op_3799_cast_fp16")]; + tensor K_full_out_1_cast_fp16 = add(x = var_3797_cast_fp16, y = var_3799_cast_fp16)[name = string("K_full_out_1_cast_fp16")]; + tensor var_3805_cast_fp16 = mul(x = V_full_slot_1_cast_fp16, y = var_3796_cast_fp16)[name = string("op_3805_cast_fp16")]; + tensor var_3806_reps_0 = const()[name = string("op_3806_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_3806_cast_fp16 = tile(reps = var_3806_reps_0, x = v_1_cast_fp16)[name = string("op_3806_cast_fp16")]; + tensor var_3807_cast_fp16 = mul(x = var_3806_cast_fp16, y = update_mask)[name = string("op_3807_cast_fp16")]; + tensor V_full_out_1_cast_fp16 = add(x = var_3805_cast_fp16, y = var_3807_cast_fp16)[name = string("V_full_out_1_cast_fp16")]; + tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = K_full_out_1_cast_fp16)[name = string("transpose_121")]; + tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_20, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_120")]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_21, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = V_full_out_1_cast_fp16)[name = string("transpose_119")]; + tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_22, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; + tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_118")]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_23, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor V_expanded_11_perm_0 = const()[name = string("V_expanded_11_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor transpose_53_cast_fp16 = transpose(perm = transpose_53_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_117")]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_47_cast_fp16, y = transpose_53_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_full)[name = string("x_107_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_107_cast_fp16)[name = string("reduce_max_5")]; + tensor var_3849 = sub(x = x_107_cast_fp16, y = reduce_max_5)[name = string("op_3849")]; + tensor var_3855 = exp(x = var_3849)[name = string("op_3855")]; + tensor var_3865_axes_0 = const()[name = string("op_3865_axes_0"), val = tensor([-1])]; + bool var_3865_keep_dims_0 = const()[name = string("op_3865_keep_dims_0"), val = bool(true)]; + tensor var_3865 = reduce_sum(axes = var_3865_axes_0, keep_dims = var_3865_keep_dims_0, x = var_3855)[name = string("op_3865")]; + tensor var_3871_cast_fp16 = real_div(x = var_3855, y = var_3865)[name = string("op_3871_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor V_expanded_11_cast_fp16 = transpose(perm = V_expanded_11_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_116")]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_3871_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_3882 = const()[name = string("op_3882"), val = tensor([0, 2, 1, 3])]; + tensor var_3889 = const()[name = string("op_3889"), val = tensor([1, 1, -1])]; + tensor var_3883_cast_fp16 = transpose(perm = var_3882, x = attn_output_31_cast_fp16)[name = string("transpose_115")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_3889, x = var_3883_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_3894 = const()[name = string("op_3894"), val = tensor([0, 2, 1])]; + string var_3910_pad_type_0 = const()[name = string("op_3910_pad_type_0"), val = string("valid")]; + int32 var_3910_groups_0 = const()[name = string("op_3910_groups_0"), val = int32(1)]; + tensor var_3910_strides_0 = const()[name = string("op_3910_strides_0"), val = tensor([1])]; + tensor var_3910_pad_0 = const()[name = string("op_3910_pad_0"), val = tensor([0, 0])]; + tensor var_3910_dilations_0 = const()[name = string("op_3910_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559906112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565149056))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3895_cast_fp16 = transpose(perm = var_3894, x = attn_output_33_cast_fp16)[name = string("transpose_114")]; + tensor var_3910_cast_fp16 = conv(dilations = var_3910_dilations_0, groups = var_3910_groups_0, pad = var_3910_pad_0, pad_type = var_3910_pad_type_0, strides = var_3910_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_3895_cast_fp16)[name = string("op_3910_cast_fp16")]; + tensor var_3914 = const()[name = string("op_3914"), val = tensor([0, 2, 1])]; + int32 var_3920 = const()[name = string("op_3920"), val = int32(-1)]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_111_cast_fp16 = transpose(perm = var_3914, x = var_3910_cast_fp16)[name = string("transpose_113")]; + tensor var_3922_cast_fp16 = mul(x = x_111_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_3922_cast_fp16")]; + bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; + tensor input_165_cast_fp16 = concat(axis = var_3920, interleave = input_165_interleave_0, values = (x_111_cast_fp16, var_3922_cast_fp16))[name = string("input_165_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_3917_to_fp16 = const()[name = string("op_3917_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_3917_to_fp16, x = input_165_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_3927_split_sizes_0 = const()[name = string("op_3927_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3927_axis_0 = const()[name = string("op_3927_axis_0"), val = int32(-1)]; + tensor var_3927_cast_fp16_0, tensor var_3927_cast_fp16_1 = split(axis = var_3927_axis_0, split_sizes = var_3927_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_3927_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565151680)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_3927_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_113_cast_fp16 = add(x = x_99_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_113_cast_fp16")]; + int32 var_3936 = const()[name = string("op_3936"), val = int32(-1)]; + fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3938_cast_fp16 = mul(x = x_113_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3938_cast_fp16")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167_cast_fp16 = concat(axis = var_3936, interleave = input_167_interleave_0, values = (x_113_cast_fp16, var_3938_cast_fp16))[name = string("input_167_cast_fp16")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_3933_to_fp16 = const()[name = string("op_3933_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3933_to_fp16, x = input_167_cast_fp16)[name = string("normed_161_cast_fp16")]; + tensor var_3943_split_sizes_0 = const()[name = string("op_3943_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3943_axis_0 = const()[name = string("op_3943_axis_0"), val = int32(-1)]; + tensor var_3943_cast_fp16_0, tensor var_3943_cast_fp16_1 = split(axis = var_3943_axis_0, split_sizes = var_3943_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3943_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565156864)))]; + tensor h_33_cast_fp16 = mul(x = var_3943_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_3954 = const()[name = string("op_3954"), val = tensor([0, 2, 1])]; + tensor input_169_axes_0 = const()[name = string("input_169_axes_0"), val = tensor([2])]; + tensor var_3955 = transpose(perm = var_3954, x = h_33_cast_fp16)[name = string("transpose_112")]; + tensor input_169 = expand_dims(axes = input_169_axes_0, x = var_3955)[name = string("input_169")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_169)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_169)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_171 = mul(x = gate_23, y = up_11)[name = string("input_171")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_171)[name = string("mlp_out_11")]; + tensor var_3995_axes_0 = const()[name = string("op_3995_axes_0"), val = tensor([2])]; + tensor var_3995 = squeeze(axes = var_3995_axes_0, x = mlp_out_11)[name = string("op_3995")]; + tensor var_3999 = const()[name = string("op_3999"), val = tensor([0, 2, 1])]; + int32 var_4005 = const()[name = string("op_4005"), val = int32(-1)]; + fp16 const_68_promoted = const()[name = string("const_68_promoted"), val = fp16(-0x1p+0)]; + tensor x_115 = transpose(perm = var_3999, x = var_3995)[name = string("transpose_111")]; + tensor var_4007 = mul(x = x_115, y = const_68_promoted)[name = string("op_4007")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173 = concat(axis = var_4005, interleave = input_173_interleave_0, values = (x_115, var_4007))[name = string("input_173")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_4002_to_fp16 = const()[name = string("op_4002_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_4002_to_fp16, x = input_173)[name = string("normed_165_cast_fp16")]; + tensor var_4012_split_sizes_0 = const()[name = string("op_4012_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4012_axis_0 = const()[name = string("op_4012_axis_0"), val = int32(-1)]; + tensor var_4012_0, tensor var_4012_1 = split(axis = var_4012_axis_0, split_sizes = var_4012_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_4012")]; + tensor hidden_states_53 = mul(x = var_4012_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_113_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 1280])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 1, 1536])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_4040 = const()[name = string("op_4040"), val = tensor([0, 2, 1])]; + tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; + tensor var_4041 = transpose(perm = var_4040, x = hidden_states_55_cast_fp16)[name = string("transpose_110")]; + tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_4041)[name = string("input_175")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_175)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_4060 = const()[name = string("op_4060"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_4061_cast_fp16 = transpose(perm = var_4060, x = per_layer_slice_11_cast_fp16)[name = string("transpose_109")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_4061_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_177_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_177_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565162048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565489792))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_4077_axes_0 = const()[name = string("op_4077_axes_0"), val = tensor([2])]; + tensor var_4077_cast_fp16 = squeeze(axes = var_4077_axes_0, x = gated_35_cast_fp16)[name = string("op_4077_cast_fp16")]; + tensor var_4081 = const()[name = string("op_4081"), val = tensor([0, 2, 1])]; + int32 var_4087 = const()[name = string("op_4087"), val = int32(-1)]; + fp16 const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_117_cast_fp16 = transpose(perm = var_4081, x = var_4077_cast_fp16)[name = string("transpose_108")]; + tensor var_4089_cast_fp16 = mul(x = x_117_cast_fp16, y = const_69_promoted_to_fp16)[name = string("op_4089_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_4087, interleave = input_179_interleave_0, values = (x_117_cast_fp16, var_4089_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_4084_to_fp16 = const()[name = string("op_4084_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_4084_to_fp16, x = input_179_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_4094_split_sizes_0 = const()[name = string("op_4094_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4094_axis_0 = const()[name = string("op_4094_axis_0"), val = int32(-1)]; + tensor var_4094_cast_fp16_0, tensor var_4094_cast_fp16_1 = split(axis = var_4094_axis_0, split_sizes = var_4094_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_4094_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565492416)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_4094_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = tensor([0x1.36p-1])]; + tensor x_119_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_70_promoted_to_fp16)[name = string("x_119_cast_fp16")]; + tensor var_4106_axes_0 = const()[name = string("op_4106_axes_0"), val = tensor([0])]; + tensor var_4106_cast_fp16 = squeeze(axes = var_4106_axes_0, x = K_full_out_1_cast_fp16)[name = string("op_4106_cast_fp16")]; + tensor var_4108_axes_0 = const()[name = string("op_4108_axes_0"), val = tensor([0])]; + tensor var_4108_cast_fp16 = squeeze(axes = var_4108_axes_0, x = V_full_out_1_cast_fp16)[name = string("op_4108_cast_fp16")]; + tensor var_4111_begin_0 = const()[name = string("op_4111_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4111_end_0 = const()[name = string("op_4111_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4111_end_mask_0 = const()[name = string("op_4111_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4111_squeeze_mask_0 = const()[name = string("op_4111_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4111_cast_fp16 = slice_by_index(begin = var_4111_begin_0, end = var_4111_end_0, end_mask = var_4111_end_mask_0, squeeze_mask = var_4111_squeeze_mask_0, x = K_sliding_in)[name = string("op_4111_cast_fp16")]; + tensor K_sliding_slot_11_axes_0 = const()[name = string("K_sliding_slot_11_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_11_cast_fp16 = expand_dims(axes = K_sliding_slot_11_axes_0, x = var_4111_cast_fp16)[name = string("K_sliding_slot_11_cast_fp16")]; + tensor var_4116_begin_0 = const()[name = string("op_4116_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4116_end_0 = const()[name = string("op_4116_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4116_end_mask_0 = const()[name = string("op_4116_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4116_squeeze_mask_0 = const()[name = string("op_4116_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4116_cast_fp16 = slice_by_index(begin = var_4116_begin_0, end = var_4116_end_0, end_mask = var_4116_end_mask_0, squeeze_mask = var_4116_squeeze_mask_0, x = V_sliding_in)[name = string("op_4116_cast_fp16")]; + tensor V_sliding_slot_11_axes_0 = const()[name = string("V_sliding_slot_11_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_11_cast_fp16 = expand_dims(axes = V_sliding_slot_11_axes_0, x = var_4116_cast_fp16)[name = string("V_sliding_slot_11_cast_fp16")]; + int32 var_4123 = const()[name = string("op_4123"), val = int32(-1)]; + fp16 const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4125_cast_fp16 = mul(x = x_119_cast_fp16, y = const_71_promoted_to_fp16)[name = string("op_4125_cast_fp16")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181_cast_fp16 = concat(axis = var_4123, interleave = input_181_interleave_0, values = (x_119_cast_fp16, var_4125_cast_fp16))[name = string("input_181_cast_fp16")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_4120_to_fp16 = const()[name = string("op_4120_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_4120_to_fp16, x = input_181_cast_fp16)[name = string("normed_173_cast_fp16")]; + tensor var_4130_split_sizes_0 = const()[name = string("op_4130_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4130_axis_0 = const()[name = string("op_4130_axis_0"), val = int32(-1)]; + tensor var_4130_cast_fp16_0, tensor var_4130_cast_fp16_1 = split(axis = var_4130_axis_0, split_sizes = var_4130_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_4130_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565497600)))]; + tensor h_37_cast_fp16 = mul(x = var_4130_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_4136 = const()[name = string("op_4136"), val = tensor([0, 2, 1])]; + tensor var_4139_axes_0 = const()[name = string("op_4139_axes_0"), val = tensor([2])]; + tensor var_4137_cast_fp16 = transpose(perm = var_4136, x = h_37_cast_fp16)[name = string("transpose_107")]; + tensor var_4139_cast_fp16 = expand_dims(axes = var_4139_axes_0, x = var_4137_cast_fp16)[name = string("op_4139_cast_fp16")]; + string var_4155_pad_type_0 = const()[name = string("op_4155_pad_type_0"), val = string("valid")]; + tensor var_4155_strides_0 = const()[name = string("op_4155_strides_0"), val = tensor([1, 1])]; + tensor var_4155_pad_0 = const()[name = string("op_4155_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4155_dilations_0 = const()[name = string("op_4155_dilations_0"), val = tensor([1, 1])]; + int32 var_4155_groups_0 = const()[name = string("op_4155_groups_0"), val = int32(1)]; + tensor var_4155 = conv(dilations = var_4155_dilations_0, groups = var_4155_groups_0, pad = var_4155_pad_0, pad_type = var_4155_pad_type_0, strides = var_4155_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_4139_cast_fp16)[name = string("op_4155")]; + tensor var_4160 = const()[name = string("op_4160"), val = tensor([1, 8, 256, 1])]; + tensor var_4161 = reshape(shape = var_4160, x = var_4155)[name = string("op_4161")]; + tensor var_4166 = const()[name = string("op_4166"), val = tensor([0, 1, 3, 2])]; + tensor var_4176 = const()[name = string("op_4176"), val = tensor([1, 8, 256])]; + tensor var_4167 = transpose(perm = var_4166, x = var_4161)[name = string("transpose_106")]; + tensor x_121 = reshape(shape = var_4176, x = var_4167)[name = string("x_121")]; + int32 var_4182 = const()[name = string("op_4182"), val = int32(-1)]; + fp16 const_72_promoted = const()[name = string("const_72_promoted"), val = fp16(-0x1p+0)]; + tensor var_4184 = mul(x = x_121, y = const_72_promoted)[name = string("op_4184")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_4182, interleave = input_185_interleave_0, values = (x_121, var_4184))[name = string("input_185")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_4179_to_fp16 = const()[name = string("op_4179_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_4179_to_fp16, x = input_185)[name = string("normed_177_cast_fp16")]; + tensor var_4189_split_sizes_0 = const()[name = string("op_4189_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4189_axis_0 = const()[name = string("op_4189_axis_0"), val = int32(-1)]; + tensor var_4189_0, tensor var_4189_1 = split(axis = var_4189_axis_0, split_sizes = var_4189_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_4189")]; + tensor var_4191 = mul(x = var_4189_0, y = layers_3_self_attn_q_norm_weight)[name = string("op_4191")]; + tensor var_4196 = const()[name = string("op_4196"), val = tensor([1, 8, 1, 256])]; + tensor q_51 = reshape(shape = var_4196, x = var_4191)[name = string("q_51")]; + tensor var_4198_cast_fp16 = mul(x = q_51, y = cos_s)[name = string("op_4198_cast_fp16")]; + tensor var_4199_split_sizes_0 = const()[name = string("op_4199_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4199_axis_0 = const()[name = string("op_4199_axis_0"), val = int32(-1)]; + tensor var_4199_0, tensor var_4199_1 = split(axis = var_4199_axis_0, split_sizes = var_4199_split_sizes_0, x = q_51)[name = string("op_4199")]; + fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; + tensor var_4201 = mul(x = var_4199_1, y = const_73_promoted)[name = string("op_4201")]; + int32 var_4203 = const()[name = string("op_4203"), val = int32(-1)]; + bool var_4204_interleave_0 = const()[name = string("op_4204_interleave_0"), val = bool(false)]; + tensor var_4204 = concat(axis = var_4203, interleave = var_4204_interleave_0, values = (var_4201, var_4199_0))[name = string("op_4204")]; + tensor var_4205_cast_fp16 = mul(x = var_4204, y = sin_s)[name = string("op_4205_cast_fp16")]; + tensor q_55_cast_fp16 = add(x = var_4198_cast_fp16, y = var_4205_cast_fp16)[name = string("q_55_cast_fp16")]; + string var_4218_pad_type_0 = const()[name = string("op_4218_pad_type_0"), val = string("valid")]; + tensor var_4218_strides_0 = const()[name = string("op_4218_strides_0"), val = tensor([1, 1])]; + tensor var_4218_pad_0 = const()[name = string("op_4218_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4218_dilations_0 = const()[name = string("op_4218_dilations_0"), val = tensor([1, 1])]; + int32 var_4218_groups_0 = const()[name = string("op_4218_groups_0"), val = int32(1)]; + tensor var_4218 = conv(dilations = var_4218_dilations_0, groups = var_4218_groups_0, pad = var_4218_pad_0, pad_type = var_4218_pad_type_0, strides = var_4218_strides_0, weight = layers_6_self_attn_k_proj_weight_palettized, x = var_4139_cast_fp16)[name = string("op_4218")]; + tensor var_4223 = const()[name = string("op_4223"), val = tensor([1, 2, 256, 1])]; + tensor var_4224 = reshape(shape = var_4223, x = var_4218)[name = string("op_4224")]; + tensor var_4229 = const()[name = string("op_4229"), val = tensor([0, 1, 3, 2])]; + string var_4246_pad_type_0 = const()[name = string("op_4246_pad_type_0"), val = string("valid")]; + tensor var_4246_strides_0 = const()[name = string("op_4246_strides_0"), val = tensor([1, 1])]; + tensor var_4246_pad_0 = const()[name = string("op_4246_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4246_dilations_0 = const()[name = string("op_4246_dilations_0"), val = tensor([1, 1])]; + int32 var_4246_groups_0 = const()[name = string("op_4246_groups_0"), val = int32(1)]; + tensor var_4246 = conv(dilations = var_4246_dilations_0, groups = var_4246_groups_0, pad = var_4246_pad_0, pad_type = var_4246_pad_type_0, strides = var_4246_strides_0, weight = layers_6_self_attn_v_proj_weight_palettized, x = var_4139_cast_fp16)[name = string("op_4246")]; + tensor var_4251 = const()[name = string("op_4251"), val = tensor([1, 2, 256, 1])]; + tensor var_4252 = reshape(shape = var_4251, x = var_4246)[name = string("op_4252")]; + tensor var_4257 = const()[name = string("op_4257"), val = tensor([0, 1, 3, 2])]; + tensor var_4267 = const()[name = string("op_4267"), val = tensor([1, 2, 256])]; + tensor var_4230 = transpose(perm = var_4229, x = var_4224)[name = string("transpose_105")]; + tensor x_123 = reshape(shape = var_4267, x = var_4230)[name = string("x_123")]; + int32 var_4273 = const()[name = string("op_4273"), val = int32(-1)]; + fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)]; + tensor var_4275 = mul(x = x_123, y = const_74_promoted)[name = string("op_4275")]; + bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; + tensor input_187 = concat(axis = var_4273, interleave = input_187_interleave_0, values = (x_123, var_4275))[name = string("input_187")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_4270_to_fp16 = const()[name = string("op_4270_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_4270_to_fp16, x = input_187)[name = string("normed_181_cast_fp16")]; + tensor var_4280_split_sizes_0 = const()[name = string("op_4280_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4280_axis_0 = const()[name = string("op_4280_axis_0"), val = int32(-1)]; + tensor var_4280_0, tensor var_4280_1 = split(axis = var_4280_axis_0, split_sizes = var_4280_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_4280")]; + tensor var_4282 = mul(x = var_4280_0, y = layers_6_self_attn_k_norm_weight)[name = string("op_4282")]; + tensor var_4287 = const()[name = string("op_4287"), val = tensor([1, 2, 1, 256])]; + tensor q_53 = reshape(shape = var_4287, x = var_4282)[name = string("q_53")]; + fp16 var_4289_promoted = const()[name = string("op_4289_promoted"), val = fp16(0x1p+1)]; + tensor var_4258 = transpose(perm = var_4257, x = var_4252)[name = string("transpose_104")]; + tensor var_4290 = pow(x = var_4258, y = var_4289_promoted)[name = string("op_4290")]; + tensor var_4295_axes_0 = const()[name = string("op_4295_axes_0"), val = tensor([-1])]; + bool var_4295_keep_dims_0 = const()[name = string("op_4295_keep_dims_0"), val = bool(true)]; + tensor var_4295 = reduce_mean(axes = var_4295_axes_0, keep_dims = var_4295_keep_dims_0, x = var_4290)[name = string("op_4295")]; + fp16 var_4297_to_fp16 = const()[name = string("op_4297_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_13_cast_fp16 = add(x = var_4295, y = var_4297_to_fp16)[name = string("mean_sq_13_cast_fp16")]; + fp32 var_4299_epsilon_0 = const()[name = string("op_4299_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4299_cast_fp16 = rsqrt(epsilon = var_4299_epsilon_0, x = mean_sq_13_cast_fp16)[name = string("op_4299_cast_fp16")]; + tensor input_191_cast_fp16 = mul(x = var_4258, y = var_4299_cast_fp16)[name = string("input_191_cast_fp16")]; + tensor var_4301_cast_fp16 = mul(x = q_53, y = cos_s)[name = string("op_4301_cast_fp16")]; + tensor var_4302_split_sizes_0 = const()[name = string("op_4302_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4302_axis_0 = const()[name = string("op_4302_axis_0"), val = int32(-1)]; + tensor var_4302_0, tensor var_4302_1 = split(axis = var_4302_axis_0, split_sizes = var_4302_split_sizes_0, x = q_53)[name = string("op_4302")]; + fp16 const_75_promoted = const()[name = string("const_75_promoted"), val = fp16(-0x1p+0)]; + tensor var_4304 = mul(x = var_4302_1, y = const_75_promoted)[name = string("op_4304")]; + int32 var_4306 = const()[name = string("op_4306"), val = int32(-1)]; + bool var_4307_interleave_0 = const()[name = string("op_4307_interleave_0"), val = bool(false)]; + tensor var_4307 = concat(axis = var_4306, interleave = var_4307_interleave_0, values = (var_4304, var_4302_0))[name = string("op_4307")]; + tensor var_4308_cast_fp16 = mul(x = var_4307, y = sin_s)[name = string("op_4308_cast_fp16")]; + tensor input_189_cast_fp16 = add(x = var_4301_cast_fp16, y = var_4308_cast_fp16)[name = string("input_189_cast_fp16")]; + tensor k_padded_11_pad_0 = const()[name = string("k_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_11_mode_0 = const()[name = string("k_padded_11_mode_0"), val = string("constant")]; + fp16 const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_11_cast_fp16 = pad(constant_val = const_76_to_fp16, mode = k_padded_11_mode_0, pad = k_padded_11_pad_0, x = input_189_cast_fp16)[name = string("k_padded_11_cast_fp16")]; + tensor v_padded_11_pad_0 = const()[name = string("v_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_11_mode_0 = const()[name = string("v_padded_11_mode_0"), val = string("constant")]; + fp16 const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_11_cast_fp16 = pad(constant_val = const_77_to_fp16, mode = v_padded_11_mode_0, pad = v_padded_11_pad_0, x = input_191_cast_fp16)[name = string("v_padded_11_cast_fp16")]; + tensor var_4337_begin_0 = const()[name = string("op_4337_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4337_end_0 = const()[name = string("op_4337_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4337_end_mask_0 = const()[name = string("op_4337_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4337_cast_fp16 = slice_by_index(begin = var_4337_begin_0, end = var_4337_end_0, end_mask = var_4337_end_mask_0, x = K_sliding_slot_11_cast_fp16)[name = string("op_4337_cast_fp16")]; + int32 var_4344 = const()[name = string("op_4344"), val = int32(2)]; + bool K_sliding_out_11_interleave_0 = const()[name = string("K_sliding_out_11_interleave_0"), val = bool(false)]; + tensor K_sliding_out_11_cast_fp16 = concat(axis = var_4344, interleave = K_sliding_out_11_interleave_0, values = (var_4337_cast_fp16, k_padded_11_cast_fp16))[name = string("K_sliding_out_11_cast_fp16")]; + tensor var_4360_begin_0 = const()[name = string("op_4360_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4360_end_0 = const()[name = string("op_4360_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4360_end_mask_0 = const()[name = string("op_4360_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4360_cast_fp16 = slice_by_index(begin = var_4360_begin_0, end = var_4360_end_0, end_mask = var_4360_end_mask_0, x = V_sliding_slot_11_cast_fp16)[name = string("op_4360_cast_fp16")]; + int32 var_4367 = const()[name = string("op_4367"), val = int32(2)]; + bool V_sliding_out_11_interleave_0 = const()[name = string("V_sliding_out_11_interleave_0"), val = bool(false)]; + tensor V_sliding_out_11_cast_fp16 = concat(axis = var_4367, interleave = V_sliding_out_11_interleave_0, values = (var_4360_cast_fp16, v_padded_11_cast_fp16))[name = string("V_sliding_out_11_cast_fp16")]; + tensor K_for_attn_13_begin_0 = const()[name = string("K_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_13_end_0 = const()[name = string("K_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_13_end_mask_0 = const()[name = string("K_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_13_cast_fp16 = slice_by_index(begin = K_for_attn_13_begin_0, end = K_for_attn_13_end_0, end_mask = K_for_attn_13_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("K_for_attn_13_cast_fp16")]; + tensor V_for_attn_13_begin_0 = const()[name = string("V_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_13_end_0 = const()[name = string("V_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_13_end_mask_0 = const()[name = string("V_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_13_cast_fp16 = slice_by_index(begin = V_for_attn_13_begin_0, end = V_for_attn_13_end_0, end_mask = V_for_attn_13_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("V_for_attn_13_cast_fp16")]; + tensor transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_12_reps_0 = const()[name = string("tile_12_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_24_cast_fp16 = transpose(perm = transpose_24_perm_0, x = K_for_attn_13_cast_fp16)[name = string("transpose_103")]; + tensor tile_12_cast_fp16 = tile(reps = tile_12_reps_0, x = transpose_24_cast_fp16)[name = string("tile_12_cast_fp16")]; + tensor concat_24 = const()[name = string("concat_24"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_24, x = tile_12_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_25 = const()[name = string("concat_25"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = reshape_24_cast_fp16)[name = string("transpose_102")]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_25, x = transpose_25_cast_fp16)[name = string("reshape_25_cast_fp16")]; + tensor transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_13_reps_0 = const()[name = string("tile_13_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_26_cast_fp16 = transpose(perm = transpose_26_perm_0, x = V_for_attn_13_cast_fp16)[name = string("transpose_101")]; + tensor tile_13_cast_fp16 = tile(reps = tile_13_reps_0, x = transpose_26_cast_fp16)[name = string("tile_13_cast_fp16")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_26, x = tile_13_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_27 = const()[name = string("concat_27"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_27_cast_fp16 = transpose(perm = transpose_27_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_100")]; + tensor reshape_27_cast_fp16 = reshape(shape = concat_27, x = transpose_27_cast_fp16)[name = string("reshape_27_cast_fp16")]; + tensor V_expanded_13_perm_0 = const()[name = string("V_expanded_13_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor transpose_54_cast_fp16 = transpose(perm = transpose_54_perm_0, x = reshape_25_cast_fp16)[name = string("transpose_99")]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_55_cast_fp16, y = transpose_54_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_127_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_127_cast_fp16)[name = string("reduce_max_6")]; + tensor var_4408 = sub(x = x_127_cast_fp16, y = reduce_max_6)[name = string("op_4408")]; + tensor var_4414 = exp(x = var_4408)[name = string("op_4414")]; + tensor var_4424_axes_0 = const()[name = string("op_4424_axes_0"), val = tensor([-1])]; + bool var_4424_keep_dims_0 = const()[name = string("op_4424_keep_dims_0"), val = bool(true)]; + tensor var_4424 = reduce_sum(axes = var_4424_axes_0, keep_dims = var_4424_keep_dims_0, x = var_4414)[name = string("op_4424")]; + tensor var_4430_cast_fp16 = real_div(x = var_4414, y = var_4424)[name = string("op_4430_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor V_expanded_13_cast_fp16 = transpose(perm = V_expanded_13_perm_0, x = reshape_27_cast_fp16)[name = string("transpose_98")]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_4430_cast_fp16, y = V_expanded_13_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_4441 = const()[name = string("op_4441"), val = tensor([0, 2, 1, 3])]; + tensor var_4448 = const()[name = string("op_4448"), val = tensor([1, 1, -1])]; + tensor var_4442_cast_fp16 = transpose(perm = var_4441, x = attn_output_37_cast_fp16)[name = string("transpose_97")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_4448, x = var_4442_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_4453 = const()[name = string("op_4453"), val = tensor([0, 2, 1])]; + string var_4469_pad_type_0 = const()[name = string("op_4469_pad_type_0"), val = string("valid")]; + int32 var_4469_groups_0 = const()[name = string("op_4469_groups_0"), val = int32(1)]; + tensor var_4469_strides_0 = const()[name = string("op_4469_strides_0"), val = tensor([1])]; + tensor var_4469_pad_0 = const()[name = string("op_4469_pad_0"), val = tensor([0, 0])]; + tensor var_4469_dilations_0 = const()[name = string("op_4469_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565502784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568124288))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4454_cast_fp16 = transpose(perm = var_4453, x = attn_output_39_cast_fp16)[name = string("transpose_96")]; + tensor var_4469_cast_fp16 = conv(dilations = var_4469_dilations_0, groups = var_4469_groups_0, pad = var_4469_pad_0, pad_type = var_4469_pad_type_0, strides = var_4469_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4454_cast_fp16)[name = string("op_4469_cast_fp16")]; + tensor var_4473 = const()[name = string("op_4473"), val = tensor([0, 2, 1])]; + int32 var_4479 = const()[name = string("op_4479"), val = int32(-1)]; + fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_131_cast_fp16 = transpose(perm = var_4473, x = var_4469_cast_fp16)[name = string("transpose_95")]; + tensor var_4481_cast_fp16 = mul(x = x_131_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_4481_cast_fp16")]; + bool input_195_interleave_0 = const()[name = string("input_195_interleave_0"), val = bool(false)]; + tensor input_195_cast_fp16 = concat(axis = var_4479, interleave = input_195_interleave_0, values = (x_131_cast_fp16, var_4481_cast_fp16))[name = string("input_195_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_4476_to_fp16 = const()[name = string("op_4476_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_4476_to_fp16, x = input_195_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor var_4486_split_sizes_0 = const()[name = string("op_4486_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4486_axis_0 = const()[name = string("op_4486_axis_0"), val = int32(-1)]; + tensor var_4486_cast_fp16_0, tensor var_4486_cast_fp16_1 = split(axis = var_4486_axis_0, split_sizes = var_4486_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_4486_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568126912)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_4486_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_119_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_133_cast_fp16")]; + int32 var_4495 = const()[name = string("op_4495"), val = int32(-1)]; + fp16 const_79_promoted_to_fp16 = const()[name = string("const_79_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4497_cast_fp16 = mul(x = x_133_cast_fp16, y = const_79_promoted_to_fp16)[name = string("op_4497_cast_fp16")]; + bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; + tensor input_197_cast_fp16 = concat(axis = var_4495, interleave = input_197_interleave_0, values = (x_133_cast_fp16, var_4497_cast_fp16))[name = string("input_197_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_4492_to_fp16 = const()[name = string("op_4492_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_4492_to_fp16, x = input_197_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor var_4502_split_sizes_0 = const()[name = string("op_4502_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4502_axis_0 = const()[name = string("op_4502_axis_0"), val = int32(-1)]; + tensor var_4502_cast_fp16_0, tensor var_4502_cast_fp16_1 = split(axis = var_4502_axis_0, split_sizes = var_4502_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_4502_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568132096)))]; + tensor h_39_cast_fp16 = mul(x = var_4502_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_4513 = const()[name = string("op_4513"), val = tensor([0, 2, 1])]; + tensor input_199_axes_0 = const()[name = string("input_199_axes_0"), val = tensor([2])]; + tensor var_4514 = transpose(perm = var_4513, x = h_39_cast_fp16)[name = string("transpose_94")]; + tensor input_199 = expand_dims(axes = input_199_axes_0, x = var_4514)[name = string("input_199")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_199)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_199)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_201 = mul(x = gate_27, y = up_13)[name = string("input_201")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_201)[name = string("mlp_out_13")]; + tensor var_4554_axes_0 = const()[name = string("op_4554_axes_0"), val = tensor([2])]; + tensor var_4554 = squeeze(axes = var_4554_axes_0, x = mlp_out_13)[name = string("op_4554")]; + tensor var_4558 = const()[name = string("op_4558"), val = tensor([0, 2, 1])]; + int32 var_4564 = const()[name = string("op_4564"), val = int32(-1)]; + fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; + tensor x_135 = transpose(perm = var_4558, x = var_4554)[name = string("transpose_93")]; + tensor var_4566 = mul(x = x_135, y = const_80_promoted)[name = string("op_4566")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203 = concat(axis = var_4564, interleave = input_203_interleave_0, values = (x_135, var_4566))[name = string("input_203")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_4561_to_fp16 = const()[name = string("op_4561_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_4561_to_fp16, x = input_203)[name = string("normed_193_cast_fp16")]; + tensor var_4571_split_sizes_0 = const()[name = string("op_4571_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4571_axis_0 = const()[name = string("op_4571_axis_0"), val = int32(-1)]; + tensor var_4571_0, tensor var_4571_1 = split(axis = var_4571_axis_0, split_sizes = var_4571_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_4571")]; + tensor hidden_states_63 = mul(x = var_4571_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 1536])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 1, 1792])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_4599 = const()[name = string("op_4599"), val = tensor([0, 2, 1])]; + tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; + tensor var_4600 = transpose(perm = var_4599, x = hidden_states_65_cast_fp16)[name = string("transpose_92")]; + tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_4600)[name = string("input_205")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_205)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_4619 = const()[name = string("op_4619"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_4620_cast_fp16 = transpose(perm = var_4619, x = per_layer_slice_13_cast_fp16)[name = string("transpose_91")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_4620_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_207_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_207_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568137280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568465024))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_207_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_4636_axes_0 = const()[name = string("op_4636_axes_0"), val = tensor([2])]; + tensor var_4636_cast_fp16 = squeeze(axes = var_4636_axes_0, x = gated_41_cast_fp16)[name = string("op_4636_cast_fp16")]; + tensor var_4640 = const()[name = string("op_4640"), val = tensor([0, 2, 1])]; + int32 var_4646 = const()[name = string("op_4646"), val = int32(-1)]; + fp16 const_81_promoted_to_fp16 = const()[name = string("const_81_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_137_cast_fp16 = transpose(perm = var_4640, x = var_4636_cast_fp16)[name = string("transpose_90")]; + tensor var_4648_cast_fp16 = mul(x = x_137_cast_fp16, y = const_81_promoted_to_fp16)[name = string("op_4648_cast_fp16")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209_cast_fp16 = concat(axis = var_4646, interleave = input_209_interleave_0, values = (x_137_cast_fp16, var_4648_cast_fp16))[name = string("input_209_cast_fp16")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_4643_to_fp16 = const()[name = string("op_4643_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_4643_to_fp16, x = input_209_cast_fp16)[name = string("normed_197_cast_fp16")]; + tensor var_4653_split_sizes_0 = const()[name = string("op_4653_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4653_axis_0 = const()[name = string("op_4653_axis_0"), val = int32(-1)]; + tensor var_4653_cast_fp16_0, tensor var_4653_cast_fp16_1 = split(axis = var_4653_axis_0, split_sizes = var_4653_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_4653_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568467648)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_4653_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = tensor([0x1.1ep-1])]; + tensor x_139_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_82_promoted_to_fp16)[name = string("x_139_cast_fp16")]; + tensor var_4665_axes_0 = const()[name = string("op_4665_axes_0"), val = tensor([0])]; + tensor var_4665_cast_fp16 = squeeze(axes = var_4665_axes_0, x = K_sliding_out_11_cast_fp16)[name = string("op_4665_cast_fp16")]; + tensor var_4667_axes_0 = const()[name = string("op_4667_axes_0"), val = tensor([0])]; + tensor var_4667_cast_fp16 = squeeze(axes = var_4667_axes_0, x = V_sliding_out_11_cast_fp16)[name = string("op_4667_cast_fp16")]; + tensor var_4670_begin_0 = const()[name = string("op_4670_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4670_end_0 = const()[name = string("op_4670_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_4670_end_mask_0 = const()[name = string("op_4670_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4670_squeeze_mask_0 = const()[name = string("op_4670_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4670_cast_fp16 = slice_by_index(begin = var_4670_begin_0, end = var_4670_end_0, end_mask = var_4670_end_mask_0, squeeze_mask = var_4670_squeeze_mask_0, x = K_sliding_in)[name = string("op_4670_cast_fp16")]; + tensor K_sliding_slot_13_axes_0 = const()[name = string("K_sliding_slot_13_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_13_cast_fp16 = expand_dims(axes = K_sliding_slot_13_axes_0, x = var_4670_cast_fp16)[name = string("K_sliding_slot_13_cast_fp16")]; + tensor var_4675_begin_0 = const()[name = string("op_4675_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4675_end_0 = const()[name = string("op_4675_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_4675_end_mask_0 = const()[name = string("op_4675_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4675_squeeze_mask_0 = const()[name = string("op_4675_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4675_cast_fp16 = slice_by_index(begin = var_4675_begin_0, end = var_4675_end_0, end_mask = var_4675_end_mask_0, squeeze_mask = var_4675_squeeze_mask_0, x = V_sliding_in)[name = string("op_4675_cast_fp16")]; + tensor V_sliding_slot_13_axes_0 = const()[name = string("V_sliding_slot_13_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_13_cast_fp16 = expand_dims(axes = V_sliding_slot_13_axes_0, x = var_4675_cast_fp16)[name = string("V_sliding_slot_13_cast_fp16")]; + int32 var_4682 = const()[name = string("op_4682"), val = int32(-1)]; + fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4684_cast_fp16 = mul(x = x_139_cast_fp16, y = const_83_promoted_to_fp16)[name = string("op_4684_cast_fp16")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211_cast_fp16 = concat(axis = var_4682, interleave = input_211_interleave_0, values = (x_139_cast_fp16, var_4684_cast_fp16))[name = string("input_211_cast_fp16")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_4679_to_fp16 = const()[name = string("op_4679_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_4679_to_fp16, x = input_211_cast_fp16)[name = string("normed_201_cast_fp16")]; + tensor var_4689_split_sizes_0 = const()[name = string("op_4689_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4689_axis_0 = const()[name = string("op_4689_axis_0"), val = int32(-1)]; + tensor var_4689_cast_fp16_0, tensor var_4689_cast_fp16_1 = split(axis = var_4689_axis_0, split_sizes = var_4689_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_4689_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568472832)))]; + tensor h_43_cast_fp16 = mul(x = var_4689_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_4695 = const()[name = string("op_4695"), val = tensor([0, 2, 1])]; + tensor var_4698_axes_0 = const()[name = string("op_4698_axes_0"), val = tensor([2])]; + tensor var_4696_cast_fp16 = transpose(perm = var_4695, x = h_43_cast_fp16)[name = string("transpose_89")]; + tensor var_4698_cast_fp16 = expand_dims(axes = var_4698_axes_0, x = var_4696_cast_fp16)[name = string("op_4698_cast_fp16")]; + string var_4714_pad_type_0 = const()[name = string("op_4714_pad_type_0"), val = string("valid")]; + tensor var_4714_strides_0 = const()[name = string("op_4714_strides_0"), val = tensor([1, 1])]; + tensor var_4714_pad_0 = const()[name = string("op_4714_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4714_dilations_0 = const()[name = string("op_4714_dilations_0"), val = tensor([1, 1])]; + int32 var_4714_groups_0 = const()[name = string("op_4714_groups_0"), val = int32(1)]; + tensor var_4714 = conv(dilations = var_4714_dilations_0, groups = var_4714_groups_0, pad = var_4714_pad_0, pad_type = var_4714_pad_type_0, strides = var_4714_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_4698_cast_fp16)[name = string("op_4714")]; + tensor var_4719 = const()[name = string("op_4719"), val = tensor([1, 8, 256, 1])]; + tensor var_4720 = reshape(shape = var_4719, x = var_4714)[name = string("op_4720")]; + tensor var_4725 = const()[name = string("op_4725"), val = tensor([0, 1, 3, 2])]; + tensor var_4735 = const()[name = string("op_4735"), val = tensor([1, 8, 256])]; + tensor var_4726 = transpose(perm = var_4725, x = var_4720)[name = string("transpose_88")]; + tensor x_141 = reshape(shape = var_4735, x = var_4726)[name = string("x_141")]; + int32 var_4741 = const()[name = string("op_4741"), val = int32(-1)]; + fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; + tensor var_4743 = mul(x = x_141, y = const_84_promoted)[name = string("op_4743")]; + bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)]; + tensor input_215 = concat(axis = var_4741, interleave = input_215_interleave_0, values = (x_141, var_4743))[name = string("input_215")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_4738_to_fp16 = const()[name = string("op_4738_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_4738_to_fp16, x = input_215)[name = string("normed_205_cast_fp16")]; + tensor var_4748_split_sizes_0 = const()[name = string("op_4748_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4748_axis_0 = const()[name = string("op_4748_axis_0"), val = int32(-1)]; + tensor var_4748_0, tensor var_4748_1 = split(axis = var_4748_axis_0, split_sizes = var_4748_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_4748")]; + tensor var_4750 = mul(x = var_4748_0, y = layers_7_self_attn_q_norm_weight)[name = string("op_4750")]; + tensor var_4755 = const()[name = string("op_4755"), val = tensor([1, 8, 1, 256])]; + tensor q_59 = reshape(shape = var_4755, x = var_4750)[name = string("q_59")]; + tensor var_4757_cast_fp16 = mul(x = q_59, y = cos_s)[name = string("op_4757_cast_fp16")]; + tensor var_4758_split_sizes_0 = const()[name = string("op_4758_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4758_axis_0 = const()[name = string("op_4758_axis_0"), val = int32(-1)]; + tensor var_4758_0, tensor var_4758_1 = split(axis = var_4758_axis_0, split_sizes = var_4758_split_sizes_0, x = q_59)[name = string("op_4758")]; + fp16 const_85_promoted = const()[name = string("const_85_promoted"), val = fp16(-0x1p+0)]; + tensor var_4760 = mul(x = var_4758_1, y = const_85_promoted)[name = string("op_4760")]; + int32 var_4762 = const()[name = string("op_4762"), val = int32(-1)]; + bool var_4763_interleave_0 = const()[name = string("op_4763_interleave_0"), val = bool(false)]; + tensor var_4763 = concat(axis = var_4762, interleave = var_4763_interleave_0, values = (var_4760, var_4758_0))[name = string("op_4763")]; + tensor var_4764_cast_fp16 = mul(x = var_4763, y = sin_s)[name = string("op_4764_cast_fp16")]; + tensor q_63_cast_fp16 = add(x = var_4757_cast_fp16, y = var_4764_cast_fp16)[name = string("q_63_cast_fp16")]; + string var_4777_pad_type_0 = const()[name = string("op_4777_pad_type_0"), val = string("valid")]; + tensor var_4777_strides_0 = const()[name = string("op_4777_strides_0"), val = tensor([1, 1])]; + tensor var_4777_pad_0 = const()[name = string("op_4777_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4777_dilations_0 = const()[name = string("op_4777_dilations_0"), val = tensor([1, 1])]; + int32 var_4777_groups_0 = const()[name = string("op_4777_groups_0"), val = int32(1)]; + tensor var_4777 = conv(dilations = var_4777_dilations_0, groups = var_4777_groups_0, pad = var_4777_pad_0, pad_type = var_4777_pad_type_0, strides = var_4777_strides_0, weight = layers_7_self_attn_k_proj_weight_palettized, x = var_4698_cast_fp16)[name = string("op_4777")]; + tensor var_4782 = const()[name = string("op_4782"), val = tensor([1, 2, 256, 1])]; + tensor var_4783 = reshape(shape = var_4782, x = var_4777)[name = string("op_4783")]; + tensor var_4788 = const()[name = string("op_4788"), val = tensor([0, 1, 3, 2])]; + string var_4805_pad_type_0 = const()[name = string("op_4805_pad_type_0"), val = string("valid")]; + tensor var_4805_strides_0 = const()[name = string("op_4805_strides_0"), val = tensor([1, 1])]; + tensor var_4805_pad_0 = const()[name = string("op_4805_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4805_dilations_0 = const()[name = string("op_4805_dilations_0"), val = tensor([1, 1])]; + int32 var_4805_groups_0 = const()[name = string("op_4805_groups_0"), val = int32(1)]; + tensor var_4805 = conv(dilations = var_4805_dilations_0, groups = var_4805_groups_0, pad = var_4805_pad_0, pad_type = var_4805_pad_type_0, strides = var_4805_strides_0, weight = layers_7_self_attn_v_proj_weight_palettized, x = var_4698_cast_fp16)[name = string("op_4805")]; + tensor var_4810 = const()[name = string("op_4810"), val = tensor([1, 2, 256, 1])]; + tensor var_4811 = reshape(shape = var_4810, x = var_4805)[name = string("op_4811")]; + tensor var_4816 = const()[name = string("op_4816"), val = tensor([0, 1, 3, 2])]; + tensor var_4826 = const()[name = string("op_4826"), val = tensor([1, 2, 256])]; + tensor var_4789 = transpose(perm = var_4788, x = var_4783)[name = string("transpose_87")]; + tensor x_143 = reshape(shape = var_4826, x = var_4789)[name = string("x_143")]; + int32 var_4832 = const()[name = string("op_4832"), val = int32(-1)]; + fp16 const_86_promoted = const()[name = string("const_86_promoted"), val = fp16(-0x1p+0)]; + tensor var_4834 = mul(x = x_143, y = const_86_promoted)[name = string("op_4834")]; + bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; + tensor input_217 = concat(axis = var_4832, interleave = input_217_interleave_0, values = (x_143, var_4834))[name = string("input_217")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_4829_to_fp16 = const()[name = string("op_4829_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_4829_to_fp16, x = input_217)[name = string("normed_209_cast_fp16")]; + tensor var_4839_split_sizes_0 = const()[name = string("op_4839_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4839_axis_0 = const()[name = string("op_4839_axis_0"), val = int32(-1)]; + tensor var_4839_0, tensor var_4839_1 = split(axis = var_4839_axis_0, split_sizes = var_4839_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_4839")]; + tensor var_4841 = mul(x = var_4839_0, y = layers_0_self_attn_k_norm_weight)[name = string("op_4841")]; + tensor var_4846 = const()[name = string("op_4846"), val = tensor([1, 2, 1, 256])]; + tensor q_61 = reshape(shape = var_4846, x = var_4841)[name = string("q_61")]; + fp16 var_4848_promoted = const()[name = string("op_4848_promoted"), val = fp16(0x1p+1)]; + tensor var_4817 = transpose(perm = var_4816, x = var_4811)[name = string("transpose_86")]; + tensor var_4849 = pow(x = var_4817, y = var_4848_promoted)[name = string("op_4849")]; + tensor var_4854_axes_0 = const()[name = string("op_4854_axes_0"), val = tensor([-1])]; + bool var_4854_keep_dims_0 = const()[name = string("op_4854_keep_dims_0"), val = bool(true)]; + tensor var_4854 = reduce_mean(axes = var_4854_axes_0, keep_dims = var_4854_keep_dims_0, x = var_4849)[name = string("op_4854")]; + fp16 var_4856_to_fp16 = const()[name = string("op_4856_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_15_cast_fp16 = add(x = var_4854, y = var_4856_to_fp16)[name = string("mean_sq_15_cast_fp16")]; + fp32 var_4858_epsilon_0 = const()[name = string("op_4858_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4858_cast_fp16 = rsqrt(epsilon = var_4858_epsilon_0, x = mean_sq_15_cast_fp16)[name = string("op_4858_cast_fp16")]; + tensor input_221_cast_fp16 = mul(x = var_4817, y = var_4858_cast_fp16)[name = string("input_221_cast_fp16")]; + tensor var_4860_cast_fp16 = mul(x = q_61, y = cos_s)[name = string("op_4860_cast_fp16")]; + tensor var_4861_split_sizes_0 = const()[name = string("op_4861_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4861_axis_0 = const()[name = string("op_4861_axis_0"), val = int32(-1)]; + tensor var_4861_0, tensor var_4861_1 = split(axis = var_4861_axis_0, split_sizes = var_4861_split_sizes_0, x = q_61)[name = string("op_4861")]; + fp16 const_87_promoted = const()[name = string("const_87_promoted"), val = fp16(-0x1p+0)]; + tensor var_4863 = mul(x = var_4861_1, y = const_87_promoted)[name = string("op_4863")]; + int32 var_4865 = const()[name = string("op_4865"), val = int32(-1)]; + bool var_4866_interleave_0 = const()[name = string("op_4866_interleave_0"), val = bool(false)]; + tensor var_4866 = concat(axis = var_4865, interleave = var_4866_interleave_0, values = (var_4863, var_4861_0))[name = string("op_4866")]; + tensor var_4867_cast_fp16 = mul(x = var_4866, y = sin_s)[name = string("op_4867_cast_fp16")]; + tensor input_219_cast_fp16 = add(x = var_4860_cast_fp16, y = var_4867_cast_fp16)[name = string("input_219_cast_fp16")]; + tensor k_padded_13_pad_0 = const()[name = string("k_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_13_mode_0 = const()[name = string("k_padded_13_mode_0"), val = string("constant")]; + fp16 const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_13_cast_fp16 = pad(constant_val = const_88_to_fp16, mode = k_padded_13_mode_0, pad = k_padded_13_pad_0, x = input_219_cast_fp16)[name = string("k_padded_13_cast_fp16")]; + tensor v_padded_13_pad_0 = const()[name = string("v_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_13_mode_0 = const()[name = string("v_padded_13_mode_0"), val = string("constant")]; + fp16 const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_13_cast_fp16 = pad(constant_val = const_89_to_fp16, mode = v_padded_13_mode_0, pad = v_padded_13_pad_0, x = input_221_cast_fp16)[name = string("v_padded_13_cast_fp16")]; + tensor var_4896_begin_0 = const()[name = string("op_4896_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4896_end_0 = const()[name = string("op_4896_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4896_end_mask_0 = const()[name = string("op_4896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4896_cast_fp16 = slice_by_index(begin = var_4896_begin_0, end = var_4896_end_0, end_mask = var_4896_end_mask_0, x = K_sliding_slot_13_cast_fp16)[name = string("op_4896_cast_fp16")]; + int32 var_4903 = const()[name = string("op_4903"), val = int32(2)]; + bool K_sliding_out_13_interleave_0 = const()[name = string("K_sliding_out_13_interleave_0"), val = bool(false)]; + tensor K_sliding_out_13_cast_fp16 = concat(axis = var_4903, interleave = K_sliding_out_13_interleave_0, values = (var_4896_cast_fp16, k_padded_13_cast_fp16))[name = string("K_sliding_out_13_cast_fp16")]; + tensor var_4919_begin_0 = const()[name = string("op_4919_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4919_end_0 = const()[name = string("op_4919_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4919_end_mask_0 = const()[name = string("op_4919_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4919_cast_fp16 = slice_by_index(begin = var_4919_begin_0, end = var_4919_end_0, end_mask = var_4919_end_mask_0, x = V_sliding_slot_13_cast_fp16)[name = string("op_4919_cast_fp16")]; + int32 var_4926 = const()[name = string("op_4926"), val = int32(2)]; + bool V_sliding_out_13_interleave_0 = const()[name = string("V_sliding_out_13_interleave_0"), val = bool(false)]; + tensor V_sliding_out_13_cast_fp16 = concat(axis = var_4926, interleave = V_sliding_out_13_interleave_0, values = (var_4919_cast_fp16, v_padded_13_cast_fp16))[name = string("V_sliding_out_13_cast_fp16")]; + tensor K_for_attn_15_begin_0 = const()[name = string("K_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_15_end_0 = const()[name = string("K_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_15_end_mask_0 = const()[name = string("K_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_15_cast_fp16 = slice_by_index(begin = K_for_attn_15_begin_0, end = K_for_attn_15_end_0, end_mask = K_for_attn_15_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("K_for_attn_15_cast_fp16")]; + tensor V_for_attn_15_begin_0 = const()[name = string("V_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_15_end_0 = const()[name = string("V_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_15_end_mask_0 = const()[name = string("V_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_15_cast_fp16 = slice_by_index(begin = V_for_attn_15_begin_0, end = V_for_attn_15_end_0, end_mask = V_for_attn_15_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("V_for_attn_15_cast_fp16")]; + tensor transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_14_reps_0 = const()[name = string("tile_14_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_28_cast_fp16 = transpose(perm = transpose_28_perm_0, x = K_for_attn_15_cast_fp16)[name = string("transpose_85")]; + tensor tile_14_cast_fp16 = tile(reps = tile_14_reps_0, x = transpose_28_cast_fp16)[name = string("tile_14_cast_fp16")]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_28_cast_fp16 = reshape(shape = concat_28, x = tile_14_cast_fp16)[name = string("reshape_28_cast_fp16")]; + tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = reshape_28_cast_fp16)[name = string("transpose_84")]; + tensor reshape_29_cast_fp16 = reshape(shape = concat_29, x = transpose_29_cast_fp16)[name = string("reshape_29_cast_fp16")]; + tensor transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_30_perm_0 = const()[name = string("transpose_30_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_15_reps_0 = const()[name = string("tile_15_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_30_cast_fp16 = transpose(perm = transpose_30_perm_0, x = V_for_attn_15_cast_fp16)[name = string("transpose_83")]; + tensor tile_15_cast_fp16 = tile(reps = tile_15_reps_0, x = transpose_30_cast_fp16)[name = string("tile_15_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_30_cast_fp16 = reshape(shape = concat_30, x = tile_15_cast_fp16)[name = string("reshape_30_cast_fp16")]; + tensor transpose_31_perm_0 = const()[name = string("transpose_31_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_31_cast_fp16 = transpose(perm = transpose_31_perm_0, x = reshape_30_cast_fp16)[name = string("transpose_82")]; + tensor reshape_31_cast_fp16 = reshape(shape = concat_31, x = transpose_31_cast_fp16)[name = string("reshape_31_cast_fp16")]; + tensor V_expanded_15_perm_0 = const()[name = string("V_expanded_15_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor transpose_55_cast_fp16 = transpose(perm = transpose_55_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_81")]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_63_cast_fp16, y = transpose_55_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_147_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_147_cast_fp16)[name = string("reduce_max_7")]; + tensor var_4967 = sub(x = x_147_cast_fp16, y = reduce_max_7)[name = string("op_4967")]; + tensor var_4973 = exp(x = var_4967)[name = string("op_4973")]; + tensor var_4983_axes_0 = const()[name = string("op_4983_axes_0"), val = tensor([-1])]; + bool var_4983_keep_dims_0 = const()[name = string("op_4983_keep_dims_0"), val = bool(true)]; + tensor var_4983 = reduce_sum(axes = var_4983_axes_0, keep_dims = var_4983_keep_dims_0, x = var_4973)[name = string("op_4983")]; + tensor var_4989_cast_fp16 = real_div(x = var_4973, y = var_4983)[name = string("op_4989_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor V_expanded_15_cast_fp16 = transpose(perm = V_expanded_15_perm_0, x = reshape_31_cast_fp16)[name = string("transpose_80")]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_4989_cast_fp16, y = V_expanded_15_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_5000 = const()[name = string("op_5000"), val = tensor([0, 2, 1, 3])]; + tensor var_5007 = const()[name = string("op_5007"), val = tensor([1, 1, -1])]; + tensor var_5001_cast_fp16 = transpose(perm = var_5000, x = attn_output_43_cast_fp16)[name = string("transpose_79")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_5007, x = var_5001_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_5012 = const()[name = string("op_5012"), val = tensor([0, 2, 1])]; + string var_5028_pad_type_0 = const()[name = string("op_5028_pad_type_0"), val = string("valid")]; + int32 var_5028_groups_0 = const()[name = string("op_5028_groups_0"), val = int32(1)]; + tensor var_5028_strides_0 = const()[name = string("op_5028_strides_0"), val = tensor([1])]; + tensor var_5028_pad_0 = const()[name = string("op_5028_pad_0"), val = tensor([0, 0])]; + tensor var_5028_dilations_0 = const()[name = string("op_5028_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568478016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571099520))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5013_cast_fp16 = transpose(perm = var_5012, x = attn_output_45_cast_fp16)[name = string("transpose_78")]; + tensor var_5028_cast_fp16 = conv(dilations = var_5028_dilations_0, groups = var_5028_groups_0, pad = var_5028_pad_0, pad_type = var_5028_pad_type_0, strides = var_5028_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5013_cast_fp16)[name = string("op_5028_cast_fp16")]; + tensor var_5032 = const()[name = string("op_5032"), val = tensor([0, 2, 1])]; + int32 var_5038 = const()[name = string("op_5038"), val = int32(-1)]; + fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_151_cast_fp16 = transpose(perm = var_5032, x = var_5028_cast_fp16)[name = string("transpose_77")]; + tensor var_5040_cast_fp16 = mul(x = x_151_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_5040_cast_fp16")]; + bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; + tensor input_225_cast_fp16 = concat(axis = var_5038, interleave = input_225_interleave_0, values = (x_151_cast_fp16, var_5040_cast_fp16))[name = string("input_225_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_5035_to_fp16 = const()[name = string("op_5035_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_5035_to_fp16, x = input_225_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_5045_split_sizes_0 = const()[name = string("op_5045_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5045_axis_0 = const()[name = string("op_5045_axis_0"), val = int32(-1)]; + tensor var_5045_cast_fp16_0, tensor var_5045_cast_fp16_1 = split(axis = var_5045_axis_0, split_sizes = var_5045_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_5045_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571102144)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_5045_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_139_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_153_cast_fp16")]; + int32 var_5054 = const()[name = string("op_5054"), val = int32(-1)]; + fp16 const_91_promoted_to_fp16 = const()[name = string("const_91_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5056_cast_fp16 = mul(x = x_153_cast_fp16, y = const_91_promoted_to_fp16)[name = string("op_5056_cast_fp16")]; + bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; + tensor input_227_cast_fp16 = concat(axis = var_5054, interleave = input_227_interleave_0, values = (x_153_cast_fp16, var_5056_cast_fp16))[name = string("input_227_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_5051_to_fp16 = const()[name = string("op_5051_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_5051_to_fp16, x = input_227_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor var_5061_split_sizes_0 = const()[name = string("op_5061_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5061_axis_0 = const()[name = string("op_5061_axis_0"), val = int32(-1)]; + tensor var_5061_cast_fp16_0, tensor var_5061_cast_fp16_1 = split(axis = var_5061_axis_0, split_sizes = var_5061_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_5061_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571107328)))]; + tensor h_45_cast_fp16 = mul(x = var_5061_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_5072 = const()[name = string("op_5072"), val = tensor([0, 2, 1])]; + tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; + tensor var_5073 = transpose(perm = var_5072, x = h_45_cast_fp16)[name = string("transpose_76")]; + tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_5073)[name = string("input_229")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_229)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_229)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_231 = mul(x = gate_31, y = up_15)[name = string("input_231")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_231)[name = string("mlp_out_15")]; + tensor var_5113_axes_0 = const()[name = string("op_5113_axes_0"), val = tensor([2])]; + tensor var_5113 = squeeze(axes = var_5113_axes_0, x = mlp_out_15)[name = string("op_5113")]; + tensor var_5117 = const()[name = string("op_5117"), val = tensor([0, 2, 1])]; + int32 var_5123 = const()[name = string("op_5123"), val = int32(-1)]; + fp16 const_92_promoted = const()[name = string("const_92_promoted"), val = fp16(-0x1p+0)]; + tensor x_155 = transpose(perm = var_5117, x = var_5113)[name = string("transpose_75")]; + tensor var_5125 = mul(x = x_155, y = const_92_promoted)[name = string("op_5125")]; + bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; + tensor input_233 = concat(axis = var_5123, interleave = input_233_interleave_0, values = (x_155, var_5125))[name = string("input_233")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_5120_to_fp16 = const()[name = string("op_5120_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_5120_to_fp16, x = input_233)[name = string("normed_221_cast_fp16")]; + tensor var_5130_split_sizes_0 = const()[name = string("op_5130_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5130_axis_0 = const()[name = string("op_5130_axis_0"), val = int32(-1)]; + tensor var_5130_0, tensor var_5130_1 = split(axis = var_5130_axis_0, split_sizes = var_5130_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_5130")]; + tensor hidden_states_73 = mul(x = var_5130_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_153_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 1792])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 1, 2048])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_5158 = const()[name = string("op_5158"), val = tensor([0, 2, 1])]; + tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; + tensor var_5159 = transpose(perm = var_5158, x = hidden_states_75_cast_fp16)[name = string("transpose_74")]; + tensor input_235 = expand_dims(axes = input_235_axes_0, x = var_5159)[name = string("input_235")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_235)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_5178 = const()[name = string("op_5178"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_5179_cast_fp16 = transpose(perm = var_5178, x = per_layer_slice_15_cast_fp16)[name = string("transpose_73")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_5179_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_237_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_237_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571112512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571440256))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_5195_axes_0 = const()[name = string("op_5195_axes_0"), val = tensor([2])]; + tensor var_5195_cast_fp16 = squeeze(axes = var_5195_axes_0, x = gated_47_cast_fp16)[name = string("op_5195_cast_fp16")]; + tensor var_5199 = const()[name = string("op_5199"), val = tensor([0, 2, 1])]; + int32 var_5205 = const()[name = string("op_5205"), val = int32(-1)]; + fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_157_cast_fp16 = transpose(perm = var_5199, x = var_5195_cast_fp16)[name = string("transpose_72")]; + tensor var_5207_cast_fp16 = mul(x = x_157_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_5207_cast_fp16")]; + bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; + tensor input_239_cast_fp16 = concat(axis = var_5205, interleave = input_239_interleave_0, values = (x_157_cast_fp16, var_5207_cast_fp16))[name = string("input_239_cast_fp16")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_5202_to_fp16 = const()[name = string("op_5202_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_5202_to_fp16, x = input_239_cast_fp16)[name = string("normed_225_cast_fp16")]; + tensor var_5212_split_sizes_0 = const()[name = string("op_5212_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5212_axis_0 = const()[name = string("op_5212_axis_0"), val = int32(-1)]; + tensor var_5212_cast_fp16_0, tensor var_5212_cast_fp16_1 = split(axis = var_5212_axis_0, split_sizes = var_5212_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_5212_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571442880)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_5212_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = tensor([0x1.58p-1])]; + tensor x_159_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_94_promoted_to_fp16)[name = string("x_159_cast_fp16")]; + tensor var_5224_axes_0 = const()[name = string("op_5224_axes_0"), val = tensor([0])]; + tensor var_5224_cast_fp16 = squeeze(axes = var_5224_axes_0, x = K_sliding_out_13_cast_fp16)[name = string("op_5224_cast_fp16")]; + tensor var_5226_axes_0 = const()[name = string("op_5226_axes_0"), val = tensor([0])]; + tensor var_5226_cast_fp16 = squeeze(axes = var_5226_axes_0, x = V_sliding_out_13_cast_fp16)[name = string("op_5226_cast_fp16")]; + tensor var_5229_begin_0 = const()[name = string("op_5229_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5229_end_0 = const()[name = string("op_5229_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_5229_end_mask_0 = const()[name = string("op_5229_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5229_squeeze_mask_0 = const()[name = string("op_5229_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5229_cast_fp16 = slice_by_index(begin = var_5229_begin_0, end = var_5229_end_0, end_mask = var_5229_end_mask_0, squeeze_mask = var_5229_squeeze_mask_0, x = K_sliding_in)[name = string("op_5229_cast_fp16")]; + tensor K_sliding_slot_15_axes_0 = const()[name = string("K_sliding_slot_15_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_15_cast_fp16 = expand_dims(axes = K_sliding_slot_15_axes_0, x = var_5229_cast_fp16)[name = string("K_sliding_slot_15_cast_fp16")]; + tensor var_5234_begin_0 = const()[name = string("op_5234_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5234_end_0 = const()[name = string("op_5234_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_5234_end_mask_0 = const()[name = string("op_5234_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5234_squeeze_mask_0 = const()[name = string("op_5234_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5234_cast_fp16 = slice_by_index(begin = var_5234_begin_0, end = var_5234_end_0, end_mask = var_5234_end_mask_0, squeeze_mask = var_5234_squeeze_mask_0, x = V_sliding_in)[name = string("op_5234_cast_fp16")]; + tensor V_sliding_slot_15_axes_0 = const()[name = string("V_sliding_slot_15_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_15_cast_fp16 = expand_dims(axes = V_sliding_slot_15_axes_0, x = var_5234_cast_fp16)[name = string("V_sliding_slot_15_cast_fp16")]; + int32 var_5241 = const()[name = string("op_5241"), val = int32(-1)]; + fp16 const_95_promoted_to_fp16 = const()[name = string("const_95_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5243_cast_fp16 = mul(x = x_159_cast_fp16, y = const_95_promoted_to_fp16)[name = string("op_5243_cast_fp16")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241_cast_fp16 = concat(axis = var_5241, interleave = input_241_interleave_0, values = (x_159_cast_fp16, var_5243_cast_fp16))[name = string("input_241_cast_fp16")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_5238_to_fp16 = const()[name = string("op_5238_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_5238_to_fp16, x = input_241_cast_fp16)[name = string("normed_229_cast_fp16")]; + tensor var_5248_split_sizes_0 = const()[name = string("op_5248_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5248_axis_0 = const()[name = string("op_5248_axis_0"), val = int32(-1)]; + tensor var_5248_cast_fp16_0, tensor var_5248_cast_fp16_1 = split(axis = var_5248_axis_0, split_sizes = var_5248_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_5248_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571448064)))]; + tensor h_49_cast_fp16 = mul(x = var_5248_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_5254 = const()[name = string("op_5254"), val = tensor([0, 2, 1])]; + tensor var_5257_axes_0 = const()[name = string("op_5257_axes_0"), val = tensor([2])]; + tensor var_5255_cast_fp16 = transpose(perm = var_5254, x = h_49_cast_fp16)[name = string("transpose_71")]; + tensor var_5257_cast_fp16 = expand_dims(axes = var_5257_axes_0, x = var_5255_cast_fp16)[name = string("op_5257_cast_fp16")]; + string var_5273_pad_type_0 = const()[name = string("op_5273_pad_type_0"), val = string("valid")]; + tensor var_5273_strides_0 = const()[name = string("op_5273_strides_0"), val = tensor([1, 1])]; + tensor var_5273_pad_0 = const()[name = string("op_5273_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5273_dilations_0 = const()[name = string("op_5273_dilations_0"), val = tensor([1, 1])]; + int32 var_5273_groups_0 = const()[name = string("op_5273_groups_0"), val = int32(1)]; + tensor var_5273 = conv(dilations = var_5273_dilations_0, groups = var_5273_groups_0, pad = var_5273_pad_0, pad_type = var_5273_pad_type_0, strides = var_5273_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_5257_cast_fp16)[name = string("op_5273")]; + tensor var_5278 = const()[name = string("op_5278"), val = tensor([1, 8, 256, 1])]; + tensor var_5279 = reshape(shape = var_5278, x = var_5273)[name = string("op_5279")]; + tensor var_5284 = const()[name = string("op_5284"), val = tensor([0, 1, 3, 2])]; + tensor var_5294 = const()[name = string("op_5294"), val = tensor([1, 8, 256])]; + tensor var_5285 = transpose(perm = var_5284, x = var_5279)[name = string("transpose_70")]; + tensor x_161 = reshape(shape = var_5294, x = var_5285)[name = string("x_161")]; + int32 var_5300 = const()[name = string("op_5300"), val = int32(-1)]; + fp16 const_96_promoted = const()[name = string("const_96_promoted"), val = fp16(-0x1p+0)]; + tensor var_5302 = mul(x = x_161, y = const_96_promoted)[name = string("op_5302")]; + bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; + tensor input_245 = concat(axis = var_5300, interleave = input_245_interleave_0, values = (x_161, var_5302))[name = string("input_245")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_5297_to_fp16 = const()[name = string("op_5297_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_5297_to_fp16, x = input_245)[name = string("normed_233_cast_fp16")]; + tensor var_5307_split_sizes_0 = const()[name = string("op_5307_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5307_axis_0 = const()[name = string("op_5307_axis_0"), val = int32(-1)]; + tensor var_5307_0, tensor var_5307_1 = split(axis = var_5307_axis_0, split_sizes = var_5307_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_5307")]; + tensor var_5309 = mul(x = var_5307_0, y = layers_8_self_attn_q_norm_weight)[name = string("op_5309")]; + tensor var_5314 = const()[name = string("op_5314"), val = tensor([1, 8, 1, 256])]; + tensor q_67 = reshape(shape = var_5314, x = var_5309)[name = string("q_67")]; + tensor var_5316_cast_fp16 = mul(x = q_67, y = cos_s)[name = string("op_5316_cast_fp16")]; + tensor var_5317_split_sizes_0 = const()[name = string("op_5317_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5317_axis_0 = const()[name = string("op_5317_axis_0"), val = int32(-1)]; + tensor var_5317_0, tensor var_5317_1 = split(axis = var_5317_axis_0, split_sizes = var_5317_split_sizes_0, x = q_67)[name = string("op_5317")]; + fp16 const_97_promoted = const()[name = string("const_97_promoted"), val = fp16(-0x1p+0)]; + tensor var_5319 = mul(x = var_5317_1, y = const_97_promoted)[name = string("op_5319")]; + int32 var_5321 = const()[name = string("op_5321"), val = int32(-1)]; + bool var_5322_interleave_0 = const()[name = string("op_5322_interleave_0"), val = bool(false)]; + tensor var_5322 = concat(axis = var_5321, interleave = var_5322_interleave_0, values = (var_5319, var_5317_0))[name = string("op_5322")]; + tensor var_5323_cast_fp16 = mul(x = var_5322, y = sin_s)[name = string("op_5323_cast_fp16")]; + tensor q_71_cast_fp16 = add(x = var_5316_cast_fp16, y = var_5323_cast_fp16)[name = string("q_71_cast_fp16")]; + string var_5336_pad_type_0 = const()[name = string("op_5336_pad_type_0"), val = string("valid")]; + tensor var_5336_strides_0 = const()[name = string("op_5336_strides_0"), val = tensor([1, 1])]; + tensor var_5336_pad_0 = const()[name = string("op_5336_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5336_dilations_0 = const()[name = string("op_5336_dilations_0"), val = tensor([1, 1])]; + int32 var_5336_groups_0 = const()[name = string("op_5336_groups_0"), val = int32(1)]; + tensor var_5336 = conv(dilations = var_5336_dilations_0, groups = var_5336_groups_0, pad = var_5336_pad_0, pad_type = var_5336_pad_type_0, strides = var_5336_strides_0, weight = layers_8_self_attn_k_proj_weight_palettized, x = var_5257_cast_fp16)[name = string("op_5336")]; + tensor var_5341 = const()[name = string("op_5341"), val = tensor([1, 2, 256, 1])]; + tensor var_5342 = reshape(shape = var_5341, x = var_5336)[name = string("op_5342")]; + tensor var_5347 = const()[name = string("op_5347"), val = tensor([0, 1, 3, 2])]; + string var_5364_pad_type_0 = const()[name = string("op_5364_pad_type_0"), val = string("valid")]; + tensor var_5364_strides_0 = const()[name = string("op_5364_strides_0"), val = tensor([1, 1])]; + tensor var_5364_pad_0 = const()[name = string("op_5364_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5364_dilations_0 = const()[name = string("op_5364_dilations_0"), val = tensor([1, 1])]; + int32 var_5364_groups_0 = const()[name = string("op_5364_groups_0"), val = int32(1)]; + tensor var_5364 = conv(dilations = var_5364_dilations_0, groups = var_5364_groups_0, pad = var_5364_pad_0, pad_type = var_5364_pad_type_0, strides = var_5364_strides_0, weight = layers_8_self_attn_v_proj_weight_palettized, x = var_5257_cast_fp16)[name = string("op_5364")]; + tensor var_5369 = const()[name = string("op_5369"), val = tensor([1, 2, 256, 1])]; + tensor var_5370 = reshape(shape = var_5369, x = var_5364)[name = string("op_5370")]; + tensor var_5375 = const()[name = string("op_5375"), val = tensor([0, 1, 3, 2])]; + tensor var_5385 = const()[name = string("op_5385"), val = tensor([1, 2, 256])]; + tensor var_5348 = transpose(perm = var_5347, x = var_5342)[name = string("transpose_69")]; + tensor x_163 = reshape(shape = var_5385, x = var_5348)[name = string("x_163")]; + int32 var_5391 = const()[name = string("op_5391"), val = int32(-1)]; + fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; + tensor var_5393 = mul(x = x_163, y = const_98_promoted)[name = string("op_5393")]; + bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; + tensor input_247 = concat(axis = var_5391, interleave = input_247_interleave_0, values = (x_163, var_5393))[name = string("input_247")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_5388_to_fp16 = const()[name = string("op_5388_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_5388_to_fp16, x = input_247)[name = string("normed_237_cast_fp16")]; + tensor var_5398_split_sizes_0 = const()[name = string("op_5398_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5398_axis_0 = const()[name = string("op_5398_axis_0"), val = int32(-1)]; + tensor var_5398_0, tensor var_5398_1 = split(axis = var_5398_axis_0, split_sizes = var_5398_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_5398")]; + tensor var_5400 = mul(x = var_5398_0, y = layers_8_self_attn_k_norm_weight)[name = string("op_5400")]; + tensor var_5405 = const()[name = string("op_5405"), val = tensor([1, 2, 1, 256])]; + tensor q_69 = reshape(shape = var_5405, x = var_5400)[name = string("q_69")]; + fp16 var_5407_promoted = const()[name = string("op_5407_promoted"), val = fp16(0x1p+1)]; + tensor var_5376 = transpose(perm = var_5375, x = var_5370)[name = string("transpose_68")]; + tensor var_5408 = pow(x = var_5376, y = var_5407_promoted)[name = string("op_5408")]; + tensor var_5413_axes_0 = const()[name = string("op_5413_axes_0"), val = tensor([-1])]; + bool var_5413_keep_dims_0 = const()[name = string("op_5413_keep_dims_0"), val = bool(true)]; + tensor var_5413 = reduce_mean(axes = var_5413_axes_0, keep_dims = var_5413_keep_dims_0, x = var_5408)[name = string("op_5413")]; + fp16 var_5415_to_fp16 = const()[name = string("op_5415_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_17_cast_fp16 = add(x = var_5413, y = var_5415_to_fp16)[name = string("mean_sq_17_cast_fp16")]; + fp32 var_5417_epsilon_0 = const()[name = string("op_5417_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5417_cast_fp16 = rsqrt(epsilon = var_5417_epsilon_0, x = mean_sq_17_cast_fp16)[name = string("op_5417_cast_fp16")]; + tensor input_251_cast_fp16 = mul(x = var_5376, y = var_5417_cast_fp16)[name = string("input_251_cast_fp16")]; + tensor var_5419_cast_fp16 = mul(x = q_69, y = cos_s)[name = string("op_5419_cast_fp16")]; + tensor var_5420_split_sizes_0 = const()[name = string("op_5420_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5420_axis_0 = const()[name = string("op_5420_axis_0"), val = int32(-1)]; + tensor var_5420_0, tensor var_5420_1 = split(axis = var_5420_axis_0, split_sizes = var_5420_split_sizes_0, x = q_69)[name = string("op_5420")]; + fp16 const_99_promoted = const()[name = string("const_99_promoted"), val = fp16(-0x1p+0)]; + tensor var_5422 = mul(x = var_5420_1, y = const_99_promoted)[name = string("op_5422")]; + int32 var_5424 = const()[name = string("op_5424"), val = int32(-1)]; + bool var_5425_interleave_0 = const()[name = string("op_5425_interleave_0"), val = bool(false)]; + tensor var_5425 = concat(axis = var_5424, interleave = var_5425_interleave_0, values = (var_5422, var_5420_0))[name = string("op_5425")]; + tensor var_5426_cast_fp16 = mul(x = var_5425, y = sin_s)[name = string("op_5426_cast_fp16")]; + tensor input_249_cast_fp16 = add(x = var_5419_cast_fp16, y = var_5426_cast_fp16)[name = string("input_249_cast_fp16")]; + tensor k_padded_15_pad_0 = const()[name = string("k_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_15_mode_0 = const()[name = string("k_padded_15_mode_0"), val = string("constant")]; + fp16 const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_15_cast_fp16 = pad(constant_val = const_100_to_fp16, mode = k_padded_15_mode_0, pad = k_padded_15_pad_0, x = input_249_cast_fp16)[name = string("k_padded_15_cast_fp16")]; + tensor v_padded_15_pad_0 = const()[name = string("v_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_15_mode_0 = const()[name = string("v_padded_15_mode_0"), val = string("constant")]; + fp16 const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_15_cast_fp16 = pad(constant_val = const_101_to_fp16, mode = v_padded_15_mode_0, pad = v_padded_15_pad_0, x = input_251_cast_fp16)[name = string("v_padded_15_cast_fp16")]; + tensor var_5455_begin_0 = const()[name = string("op_5455_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5455_end_0 = const()[name = string("op_5455_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5455_end_mask_0 = const()[name = string("op_5455_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5455_cast_fp16 = slice_by_index(begin = var_5455_begin_0, end = var_5455_end_0, end_mask = var_5455_end_mask_0, x = K_sliding_slot_15_cast_fp16)[name = string("op_5455_cast_fp16")]; + int32 var_5462 = const()[name = string("op_5462"), val = int32(2)]; + bool K_sliding_out_15_interleave_0 = const()[name = string("K_sliding_out_15_interleave_0"), val = bool(false)]; + tensor K_sliding_out_15_cast_fp16 = concat(axis = var_5462, interleave = K_sliding_out_15_interleave_0, values = (var_5455_cast_fp16, k_padded_15_cast_fp16))[name = string("K_sliding_out_15_cast_fp16")]; + tensor var_5478_begin_0 = const()[name = string("op_5478_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5478_end_0 = const()[name = string("op_5478_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5478_end_mask_0 = const()[name = string("op_5478_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5478_cast_fp16 = slice_by_index(begin = var_5478_begin_0, end = var_5478_end_0, end_mask = var_5478_end_mask_0, x = V_sliding_slot_15_cast_fp16)[name = string("op_5478_cast_fp16")]; + int32 var_5485 = const()[name = string("op_5485"), val = int32(2)]; + bool V_sliding_out_15_interleave_0 = const()[name = string("V_sliding_out_15_interleave_0"), val = bool(false)]; + tensor V_sliding_out_15_cast_fp16 = concat(axis = var_5485, interleave = V_sliding_out_15_interleave_0, values = (var_5478_cast_fp16, v_padded_15_cast_fp16))[name = string("V_sliding_out_15_cast_fp16")]; + tensor K_for_attn_17_begin_0 = const()[name = string("K_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_17_end_0 = const()[name = string("K_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_17_end_mask_0 = const()[name = string("K_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_17_cast_fp16 = slice_by_index(begin = K_for_attn_17_begin_0, end = K_for_attn_17_end_0, end_mask = K_for_attn_17_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("K_for_attn_17_cast_fp16")]; + tensor V_for_attn_17_begin_0 = const()[name = string("V_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_17_end_0 = const()[name = string("V_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_17_end_mask_0 = const()[name = string("V_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_17_cast_fp16 = slice_by_index(begin = V_for_attn_17_begin_0, end = V_for_attn_17_end_0, end_mask = V_for_attn_17_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("V_for_attn_17_cast_fp16")]; + tensor transpose_32_perm_0 = const()[name = string("transpose_32_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_16_reps_0 = const()[name = string("tile_16_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_32_cast_fp16 = transpose(perm = transpose_32_perm_0, x = K_for_attn_17_cast_fp16)[name = string("transpose_67")]; + tensor tile_16_cast_fp16 = tile(reps = tile_16_reps_0, x = transpose_32_cast_fp16)[name = string("tile_16_cast_fp16")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_32_cast_fp16 = reshape(shape = concat_32, x = tile_16_cast_fp16)[name = string("reshape_32_cast_fp16")]; + tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_33 = const()[name = string("concat_33"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_33_cast_fp16 = transpose(perm = transpose_33_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_66")]; + tensor reshape_33_cast_fp16 = reshape(shape = concat_33, x = transpose_33_cast_fp16)[name = string("reshape_33_cast_fp16")]; + tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_17_reps_0 = const()[name = string("tile_17_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_34_cast_fp16 = transpose(perm = transpose_34_perm_0, x = V_for_attn_17_cast_fp16)[name = string("transpose_65")]; + tensor tile_17_cast_fp16 = tile(reps = tile_17_reps_0, x = transpose_34_cast_fp16)[name = string("tile_17_cast_fp16")]; + tensor concat_34 = const()[name = string("concat_34"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_34_cast_fp16 = reshape(shape = concat_34, x = tile_17_cast_fp16)[name = string("reshape_34_cast_fp16")]; + tensor transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_35_cast_fp16 = transpose(perm = transpose_35_perm_0, x = reshape_34_cast_fp16)[name = string("transpose_64")]; + tensor reshape_35_cast_fp16 = reshape(shape = concat_35, x = transpose_35_cast_fp16)[name = string("reshape_35_cast_fp16")]; + tensor V_expanded_17_perm_0 = const()[name = string("V_expanded_17_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor transpose_56_cast_fp16 = transpose(perm = transpose_56_perm_0, x = reshape_33_cast_fp16)[name = string("transpose_63")]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_71_cast_fp16, y = transpose_56_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_167_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_167_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_167_cast_fp16)[name = string("reduce_max_8")]; + tensor var_5526 = sub(x = x_167_cast_fp16, y = reduce_max_8)[name = string("op_5526")]; + tensor var_5532 = exp(x = var_5526)[name = string("op_5532")]; + tensor var_5542_axes_0 = const()[name = string("op_5542_axes_0"), val = tensor([-1])]; + bool var_5542_keep_dims_0 = const()[name = string("op_5542_keep_dims_0"), val = bool(true)]; + tensor var_5542 = reduce_sum(axes = var_5542_axes_0, keep_dims = var_5542_keep_dims_0, x = var_5532)[name = string("op_5542")]; + tensor var_5548_cast_fp16 = real_div(x = var_5532, y = var_5542)[name = string("op_5548_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor V_expanded_17_cast_fp16 = transpose(perm = V_expanded_17_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_62")]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_5548_cast_fp16, y = V_expanded_17_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_5559 = const()[name = string("op_5559"), val = tensor([0, 2, 1, 3])]; + tensor var_5566 = const()[name = string("op_5566"), val = tensor([1, 1, -1])]; + tensor var_5560_cast_fp16 = transpose(perm = var_5559, x = attn_output_49_cast_fp16)[name = string("transpose_61")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_5566, x = var_5560_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_5571 = const()[name = string("op_5571"), val = tensor([0, 2, 1])]; + string var_5587_pad_type_0 = const()[name = string("op_5587_pad_type_0"), val = string("valid")]; + int32 var_5587_groups_0 = const()[name = string("op_5587_groups_0"), val = int32(1)]; + tensor var_5587_strides_0 = const()[name = string("op_5587_strides_0"), val = tensor([1])]; + tensor var_5587_pad_0 = const()[name = string("op_5587_pad_0"), val = tensor([0, 0])]; + tensor var_5587_dilations_0 = const()[name = string("op_5587_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571453248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574074752))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5572_cast_fp16 = transpose(perm = var_5571, x = attn_output_51_cast_fp16)[name = string("transpose_60")]; + tensor var_5587_cast_fp16 = conv(dilations = var_5587_dilations_0, groups = var_5587_groups_0, pad = var_5587_pad_0, pad_type = var_5587_pad_type_0, strides = var_5587_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5572_cast_fp16)[name = string("op_5587_cast_fp16")]; + tensor var_5591 = const()[name = string("op_5591"), val = tensor([0, 2, 1])]; + int32 var_5597 = const()[name = string("op_5597"), val = int32(-1)]; + fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_171_cast_fp16 = transpose(perm = var_5591, x = var_5587_cast_fp16)[name = string("transpose_59")]; + tensor var_5599_cast_fp16 = mul(x = x_171_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_5599_cast_fp16")]; + bool input_255_interleave_0 = const()[name = string("input_255_interleave_0"), val = bool(false)]; + tensor input_255_cast_fp16 = concat(axis = var_5597, interleave = input_255_interleave_0, values = (x_171_cast_fp16, var_5599_cast_fp16))[name = string("input_255_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_5594_to_fp16 = const()[name = string("op_5594_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_5594_to_fp16, x = input_255_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor var_5604_split_sizes_0 = const()[name = string("op_5604_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5604_axis_0 = const()[name = string("op_5604_axis_0"), val = int32(-1)]; + tensor var_5604_cast_fp16_0, tensor var_5604_cast_fp16_1 = split(axis = var_5604_axis_0, split_sizes = var_5604_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_5604_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574077376)))]; + tensor attn_output_53_cast_fp16 = mul(x = var_5604_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor x_173_cast_fp16 = add(x = x_159_cast_fp16, y = attn_output_53_cast_fp16)[name = string("x_173_cast_fp16")]; + int32 var_5613 = const()[name = string("op_5613"), val = int32(-1)]; + fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5615_cast_fp16 = mul(x = x_173_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_5615_cast_fp16")]; + bool input_257_interleave_0 = const()[name = string("input_257_interleave_0"), val = bool(false)]; + tensor input_257_cast_fp16 = concat(axis = var_5613, interleave = input_257_interleave_0, values = (x_173_cast_fp16, var_5615_cast_fp16))[name = string("input_257_cast_fp16")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_5610_to_fp16 = const()[name = string("op_5610_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_5610_to_fp16, x = input_257_cast_fp16)[name = string("normed_245_cast_fp16")]; + tensor var_5620_split_sizes_0 = const()[name = string("op_5620_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5620_axis_0 = const()[name = string("op_5620_axis_0"), val = int32(-1)]; + tensor var_5620_cast_fp16_0, tensor var_5620_cast_fp16_1 = split(axis = var_5620_axis_0, split_sizes = var_5620_split_sizes_0, x = normed_245_cast_fp16)[name = string("op_5620_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574082560)))]; + tensor h_51_cast_fp16 = mul(x = var_5620_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_5631 = const()[name = string("op_5631"), val = tensor([0, 2, 1])]; + tensor input_259_axes_0 = const()[name = string("input_259_axes_0"), val = tensor([2])]; + tensor var_5632 = transpose(perm = var_5631, x = h_51_cast_fp16)[name = string("transpose_58")]; + tensor input_259 = expand_dims(axes = input_259_axes_0, x = var_5632)[name = string("input_259")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_259)[name = string("gate_33")]; + string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; + tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; + tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; + int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; + tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_259)[name = string("up_17")]; + string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; + tensor input_261 = mul(x = gate_35, y = up_17)[name = string("input_261")]; + string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")]; + tensor mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)]; + tensor mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_261)[name = string("mlp_out_17")]; + tensor var_5672_axes_0 = const()[name = string("op_5672_axes_0"), val = tensor([2])]; + tensor var_5672 = squeeze(axes = var_5672_axes_0, x = mlp_out_17)[name = string("op_5672")]; + tensor var_5676 = const()[name = string("op_5676"), val = tensor([0, 2, 1])]; + int32 var_5682 = const()[name = string("op_5682"), val = int32(-1)]; + fp16 const_104_promoted = const()[name = string("const_104_promoted"), val = fp16(-0x1p+0)]; + tensor x_175 = transpose(perm = var_5676, x = var_5672)[name = string("transpose_57")]; + tensor var_5684 = mul(x = x_175, y = const_104_promoted)[name = string("op_5684")]; + bool input_263_interleave_0 = const()[name = string("input_263_interleave_0"), val = bool(false)]; + tensor input_263 = concat(axis = var_5682, interleave = input_263_interleave_0, values = (x_175, var_5684))[name = string("input_263")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_5679_to_fp16 = const()[name = string("op_5679_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_5679_to_fp16, x = input_263)[name = string("normed_249_cast_fp16")]; + tensor var_5689_split_sizes_0 = const()[name = string("op_5689_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5689_axis_0 = const()[name = string("op_5689_axis_0"), val = int32(-1)]; + tensor var_5689_0, tensor var_5689_1 = split(axis = var_5689_axis_0, split_sizes = var_5689_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_5689")]; + tensor hidden_states_83 = mul(x = var_5689_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_173_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor([0, 0, 2048])]; + tensor per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor([1, 1, 2304])]; + tensor per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_17_cast_fp16 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_17_cast_fp16")]; + tensor var_5717 = const()[name = string("op_5717"), val = tensor([0, 2, 1])]; + tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; + tensor var_5718 = transpose(perm = var_5717, x = hidden_states_85_cast_fp16)[name = string("transpose_56")]; + tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_5718)[name = string("input_265")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_265)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_5737 = const()[name = string("op_5737"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_17_axes_0 = const()[name = string("per_layer_slice_conv_17_axes_0"), val = tensor([2])]; + tensor var_5738_cast_fp16 = transpose(perm = var_5737, x = per_layer_slice_17_cast_fp16)[name = string("transpose_55")]; + tensor per_layer_slice_conv_17_cast_fp16 = expand_dims(axes = per_layer_slice_conv_17_axes_0, x = var_5738_cast_fp16)[name = string("per_layer_slice_conv_17_cast_fp16")]; + tensor input_267_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_17_cast_fp16)[name = string("input_267_cast_fp16")]; + string gated_53_pad_type_0 = const()[name = string("gated_53_pad_type_0"), val = string("valid")]; + tensor gated_53_strides_0 = const()[name = string("gated_53_strides_0"), val = tensor([1, 1])]; + tensor gated_53_pad_0 = const()[name = string("gated_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_53_dilations_0 = const()[name = string("gated_53_dilations_0"), val = tensor([1, 1])]; + int32 gated_53_groups_0 = const()[name = string("gated_53_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574087744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574415488))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_53_cast_fp16 = conv(dilations = gated_53_dilations_0, groups = gated_53_groups_0, pad = gated_53_pad_0, pad_type = gated_53_pad_type_0, strides = gated_53_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_267_cast_fp16)[name = string("gated_53_cast_fp16")]; + tensor var_5754_axes_0 = const()[name = string("op_5754_axes_0"), val = tensor([2])]; + tensor var_5754_cast_fp16 = squeeze(axes = var_5754_axes_0, x = gated_53_cast_fp16)[name = string("op_5754_cast_fp16")]; + tensor var_5758 = const()[name = string("op_5758"), val = tensor([0, 2, 1])]; + int32 var_5764 = const()[name = string("op_5764"), val = int32(-1)]; + fp16 const_105_promoted_to_fp16 = const()[name = string("const_105_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_177_cast_fp16 = transpose(perm = var_5758, x = var_5754_cast_fp16)[name = string("transpose_54")]; + tensor var_5766_cast_fp16 = mul(x = x_177_cast_fp16, y = const_105_promoted_to_fp16)[name = string("op_5766_cast_fp16")]; + bool input_269_interleave_0 = const()[name = string("input_269_interleave_0"), val = bool(false)]; + tensor input_269_cast_fp16 = concat(axis = var_5764, interleave = input_269_interleave_0, values = (x_177_cast_fp16, var_5766_cast_fp16))[name = string("input_269_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_5761_to_fp16 = const()[name = string("op_5761_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_5761_to_fp16, x = input_269_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor var_5771_split_sizes_0 = const()[name = string("op_5771_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5771_axis_0 = const()[name = string("op_5771_axis_0"), val = int32(-1)]; + tensor var_5771_cast_fp16_0, tensor var_5771_cast_fp16_1 = split(axis = var_5771_axis_0, split_sizes = var_5771_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_5771_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574418112)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_5771_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_91_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; + tensor const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = tensor([0x1.d4p-3])]; + tensor x_179_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_106_promoted_to_fp16)[name = string("x_179_cast_fp16")]; + tensor var_5783_axes_0 = const()[name = string("op_5783_axes_0"), val = tensor([0])]; + tensor var_5783_cast_fp16 = squeeze(axes = var_5783_axes_0, x = K_sliding_out_15_cast_fp16)[name = string("op_5783_cast_fp16")]; + tensor var_5785_axes_0 = const()[name = string("op_5785_axes_0"), val = tensor([0])]; + tensor var_5785_cast_fp16 = squeeze(axes = var_5785_axes_0, x = V_sliding_out_15_cast_fp16)[name = string("op_5785_cast_fp16")]; + tensor var_5788_begin_0 = const()[name = string("op_5788_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_5788_end_0 = const()[name = string("op_5788_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_5788_end_mask_0 = const()[name = string("op_5788_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5788_squeeze_mask_0 = const()[name = string("op_5788_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5788_cast_fp16 = slice_by_index(begin = var_5788_begin_0, end = var_5788_end_0, end_mask = var_5788_end_mask_0, squeeze_mask = var_5788_squeeze_mask_0, x = K_sliding_in)[name = string("op_5788_cast_fp16")]; + tensor K_sliding_slot_17_axes_0 = const()[name = string("K_sliding_slot_17_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_17_cast_fp16 = expand_dims(axes = K_sliding_slot_17_axes_0, x = var_5788_cast_fp16)[name = string("K_sliding_slot_17_cast_fp16")]; + tensor var_5793_begin_0 = const()[name = string("op_5793_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_5793_end_0 = const()[name = string("op_5793_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_5793_end_mask_0 = const()[name = string("op_5793_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5793_squeeze_mask_0 = const()[name = string("op_5793_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5793_cast_fp16 = slice_by_index(begin = var_5793_begin_0, end = var_5793_end_0, end_mask = var_5793_end_mask_0, squeeze_mask = var_5793_squeeze_mask_0, x = V_sliding_in)[name = string("op_5793_cast_fp16")]; + tensor V_sliding_slot_17_axes_0 = const()[name = string("V_sliding_slot_17_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_17_cast_fp16 = expand_dims(axes = V_sliding_slot_17_axes_0, x = var_5793_cast_fp16)[name = string("V_sliding_slot_17_cast_fp16")]; + int32 var_5800 = const()[name = string("op_5800"), val = int32(-1)]; + fp16 const_107_promoted_to_fp16 = const()[name = string("const_107_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5802_cast_fp16 = mul(x = x_179_cast_fp16, y = const_107_promoted_to_fp16)[name = string("op_5802_cast_fp16")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271_cast_fp16 = concat(axis = var_5800, interleave = input_271_interleave_0, values = (x_179_cast_fp16, var_5802_cast_fp16))[name = string("input_271_cast_fp16")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_5797_to_fp16 = const()[name = string("op_5797_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_5797_to_fp16, x = input_271_cast_fp16)[name = string("normed_257_cast_fp16")]; + tensor var_5807_split_sizes_0 = const()[name = string("op_5807_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5807_axis_0 = const()[name = string("op_5807_axis_0"), val = int32(-1)]; + tensor var_5807_cast_fp16_0, tensor var_5807_cast_fp16_1 = split(axis = var_5807_axis_0, split_sizes = var_5807_split_sizes_0, x = normed_257_cast_fp16)[name = string("op_5807_cast_fp16")]; + tensor layers_9_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574423296)))]; + tensor h_55_cast_fp16 = mul(x = var_5807_cast_fp16_0, y = layers_9_input_layernorm_weight_promoted_to_fp16)[name = string("h_55_cast_fp16")]; + tensor var_5813 = const()[name = string("op_5813"), val = tensor([0, 2, 1])]; + tensor var_5816_axes_0 = const()[name = string("op_5816_axes_0"), val = tensor([2])]; + tensor var_5814_cast_fp16 = transpose(perm = var_5813, x = h_55_cast_fp16)[name = string("transpose_53")]; + tensor var_5816_cast_fp16 = expand_dims(axes = var_5816_axes_0, x = var_5814_cast_fp16)[name = string("op_5816_cast_fp16")]; + string var_5832_pad_type_0 = const()[name = string("op_5832_pad_type_0"), val = string("valid")]; + tensor var_5832_strides_0 = const()[name = string("op_5832_strides_0"), val = tensor([1, 1])]; + tensor var_5832_pad_0 = const()[name = string("op_5832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5832_dilations_0 = const()[name = string("op_5832_dilations_0"), val = tensor([1, 1])]; + int32 var_5832_groups_0 = const()[name = string("op_5832_groups_0"), val = int32(1)]; + tensor var_5832 = conv(dilations = var_5832_dilations_0, groups = var_5832_groups_0, pad = var_5832_pad_0, pad_type = var_5832_pad_type_0, strides = var_5832_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_5816_cast_fp16)[name = string("op_5832")]; + tensor var_5837 = const()[name = string("op_5837"), val = tensor([1, 8, 256, 1])]; + tensor var_5838 = reshape(shape = var_5837, x = var_5832)[name = string("op_5838")]; + tensor var_5843 = const()[name = string("op_5843"), val = tensor([0, 1, 3, 2])]; + tensor var_5853 = const()[name = string("op_5853"), val = tensor([1, 8, 256])]; + tensor var_5844 = transpose(perm = var_5843, x = var_5838)[name = string("transpose_52")]; + tensor x_181 = reshape(shape = var_5853, x = var_5844)[name = string("x_181")]; + int32 var_5859 = const()[name = string("op_5859"), val = int32(-1)]; + fp16 const_108_promoted = const()[name = string("const_108_promoted"), val = fp16(-0x1p+0)]; + tensor var_5861 = mul(x = x_181, y = const_108_promoted)[name = string("op_5861")]; + bool input_275_interleave_0 = const()[name = string("input_275_interleave_0"), val = bool(false)]; + tensor input_275 = concat(axis = var_5859, interleave = input_275_interleave_0, values = (x_181, var_5861))[name = string("input_275")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_5856_to_fp16 = const()[name = string("op_5856_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_5856_to_fp16, x = input_275)[name = string("normed_261_cast_fp16")]; + tensor var_5866_split_sizes_0 = const()[name = string("op_5866_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5866_axis_0 = const()[name = string("op_5866_axis_0"), val = int32(-1)]; + tensor var_5866_0, tensor var_5866_1 = split(axis = var_5866_axis_0, split_sizes = var_5866_split_sizes_0, x = normed_261_cast_fp16)[name = string("op_5866")]; + tensor var_5868 = mul(x = var_5866_0, y = layers_9_self_attn_q_norm_weight)[name = string("op_5868")]; + tensor var_5873 = const()[name = string("op_5873"), val = tensor([1, 8, 1, 256])]; + tensor q_75 = reshape(shape = var_5873, x = var_5868)[name = string("q_75")]; + tensor var_5875_cast_fp16 = mul(x = q_75, y = cos_s)[name = string("op_5875_cast_fp16")]; + tensor var_5876_split_sizes_0 = const()[name = string("op_5876_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5876_axis_0 = const()[name = string("op_5876_axis_0"), val = int32(-1)]; + tensor var_5876_0, tensor var_5876_1 = split(axis = var_5876_axis_0, split_sizes = var_5876_split_sizes_0, x = q_75)[name = string("op_5876")]; + fp16 const_109_promoted = const()[name = string("const_109_promoted"), val = fp16(-0x1p+0)]; + tensor var_5878 = mul(x = var_5876_1, y = const_109_promoted)[name = string("op_5878")]; + int32 var_5880 = const()[name = string("op_5880"), val = int32(-1)]; + bool var_5881_interleave_0 = const()[name = string("op_5881_interleave_0"), val = bool(false)]; + tensor var_5881 = concat(axis = var_5880, interleave = var_5881_interleave_0, values = (var_5878, var_5876_0))[name = string("op_5881")]; + tensor var_5882_cast_fp16 = mul(x = var_5881, y = sin_s)[name = string("op_5882_cast_fp16")]; + tensor q_79_cast_fp16 = add(x = var_5875_cast_fp16, y = var_5882_cast_fp16)[name = string("q_79_cast_fp16")]; + string var_5895_pad_type_0 = const()[name = string("op_5895_pad_type_0"), val = string("valid")]; + tensor var_5895_strides_0 = const()[name = string("op_5895_strides_0"), val = tensor([1, 1])]; + tensor var_5895_pad_0 = const()[name = string("op_5895_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5895_dilations_0 = const()[name = string("op_5895_dilations_0"), val = tensor([1, 1])]; + int32 var_5895_groups_0 = const()[name = string("op_5895_groups_0"), val = int32(1)]; + tensor var_5895 = conv(dilations = var_5895_dilations_0, groups = var_5895_groups_0, pad = var_5895_pad_0, pad_type = var_5895_pad_type_0, strides = var_5895_strides_0, weight = layers_9_self_attn_k_proj_weight_palettized, x = var_5816_cast_fp16)[name = string("op_5895")]; + tensor var_5900 = const()[name = string("op_5900"), val = tensor([1, 2, 256, 1])]; + tensor var_5901 = reshape(shape = var_5900, x = var_5895)[name = string("op_5901")]; + tensor var_5906 = const()[name = string("op_5906"), val = tensor([0, 1, 3, 2])]; + string var_5923_pad_type_0 = const()[name = string("op_5923_pad_type_0"), val = string("valid")]; + tensor var_5923_strides_0 = const()[name = string("op_5923_strides_0"), val = tensor([1, 1])]; + tensor var_5923_pad_0 = const()[name = string("op_5923_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5923_dilations_0 = const()[name = string("op_5923_dilations_0"), val = tensor([1, 1])]; + int32 var_5923_groups_0 = const()[name = string("op_5923_groups_0"), val = int32(1)]; + tensor var_5923 = conv(dilations = var_5923_dilations_0, groups = var_5923_groups_0, pad = var_5923_pad_0, pad_type = var_5923_pad_type_0, strides = var_5923_strides_0, weight = layers_9_self_attn_v_proj_weight_palettized, x = var_5816_cast_fp16)[name = string("op_5923")]; + tensor var_5928 = const()[name = string("op_5928"), val = tensor([1, 2, 256, 1])]; + tensor var_5929 = reshape(shape = var_5928, x = var_5923)[name = string("op_5929")]; + tensor var_5934 = const()[name = string("op_5934"), val = tensor([0, 1, 3, 2])]; + tensor var_5944 = const()[name = string("op_5944"), val = tensor([1, 2, 256])]; + tensor var_5907 = transpose(perm = var_5906, x = var_5901)[name = string("transpose_51")]; + tensor x_183 = reshape(shape = var_5944, x = var_5907)[name = string("x_183")]; + int32 var_5950 = const()[name = string("op_5950"), val = int32(-1)]; + fp16 const_110_promoted = const()[name = string("const_110_promoted"), val = fp16(-0x1p+0)]; + tensor var_5952 = mul(x = x_183, y = const_110_promoted)[name = string("op_5952")]; + bool input_277_interleave_0 = const()[name = string("input_277_interleave_0"), val = bool(false)]; + tensor input_277 = concat(axis = var_5950, interleave = input_277_interleave_0, values = (x_183, var_5952))[name = string("input_277")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_5947_to_fp16 = const()[name = string("op_5947_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_5947_to_fp16, x = input_277)[name = string("normed_265_cast_fp16")]; + tensor var_5957_split_sizes_0 = const()[name = string("op_5957_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5957_axis_0 = const()[name = string("op_5957_axis_0"), val = int32(-1)]; + tensor var_5957_0, tensor var_5957_1 = split(axis = var_5957_axis_0, split_sizes = var_5957_split_sizes_0, x = normed_265_cast_fp16)[name = string("op_5957")]; + tensor var_5959 = mul(x = var_5957_0, y = layers_9_self_attn_k_norm_weight)[name = string("op_5959")]; + tensor var_5964 = const()[name = string("op_5964"), val = tensor([1, 2, 1, 256])]; + tensor q_77 = reshape(shape = var_5964, x = var_5959)[name = string("q_77")]; + fp16 var_5966_promoted = const()[name = string("op_5966_promoted"), val = fp16(0x1p+1)]; + tensor var_5935 = transpose(perm = var_5934, x = var_5929)[name = string("transpose_50")]; + tensor var_5967 = pow(x = var_5935, y = var_5966_promoted)[name = string("op_5967")]; + tensor var_5972_axes_0 = const()[name = string("op_5972_axes_0"), val = tensor([-1])]; + bool var_5972_keep_dims_0 = const()[name = string("op_5972_keep_dims_0"), val = bool(true)]; + tensor var_5972 = reduce_mean(axes = var_5972_axes_0, keep_dims = var_5972_keep_dims_0, x = var_5967)[name = string("op_5972")]; + fp16 var_5974_to_fp16 = const()[name = string("op_5974_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_19_cast_fp16 = add(x = var_5972, y = var_5974_to_fp16)[name = string("mean_sq_19_cast_fp16")]; + fp32 var_5976_epsilon_0 = const()[name = string("op_5976_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5976_cast_fp16 = rsqrt(epsilon = var_5976_epsilon_0, x = mean_sq_19_cast_fp16)[name = string("op_5976_cast_fp16")]; + tensor input_281_cast_fp16 = mul(x = var_5935, y = var_5976_cast_fp16)[name = string("input_281_cast_fp16")]; + tensor var_5978_cast_fp16 = mul(x = q_77, y = cos_s)[name = string("op_5978_cast_fp16")]; + tensor var_5979_split_sizes_0 = const()[name = string("op_5979_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5979_axis_0 = const()[name = string("op_5979_axis_0"), val = int32(-1)]; + tensor var_5979_0, tensor var_5979_1 = split(axis = var_5979_axis_0, split_sizes = var_5979_split_sizes_0, x = q_77)[name = string("op_5979")]; + fp16 const_111_promoted = const()[name = string("const_111_promoted"), val = fp16(-0x1p+0)]; + tensor var_5981 = mul(x = var_5979_1, y = const_111_promoted)[name = string("op_5981")]; + int32 var_5983 = const()[name = string("op_5983"), val = int32(-1)]; + bool var_5984_interleave_0 = const()[name = string("op_5984_interleave_0"), val = bool(false)]; + tensor var_5984 = concat(axis = var_5983, interleave = var_5984_interleave_0, values = (var_5981, var_5979_0))[name = string("op_5984")]; + tensor var_5985_cast_fp16 = mul(x = var_5984, y = sin_s)[name = string("op_5985_cast_fp16")]; + tensor input_279_cast_fp16 = add(x = var_5978_cast_fp16, y = var_5985_cast_fp16)[name = string("input_279_cast_fp16")]; + tensor k_padded_17_pad_0 = const()[name = string("k_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_17_mode_0 = const()[name = string("k_padded_17_mode_0"), val = string("constant")]; + fp16 const_112_to_fp16 = const()[name = string("const_112_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_17_cast_fp16 = pad(constant_val = const_112_to_fp16, mode = k_padded_17_mode_0, pad = k_padded_17_pad_0, x = input_279_cast_fp16)[name = string("k_padded_17_cast_fp16")]; + tensor v_padded_17_pad_0 = const()[name = string("v_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_17_mode_0 = const()[name = string("v_padded_17_mode_0"), val = string("constant")]; + fp16 const_113_to_fp16 = const()[name = string("const_113_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_17_cast_fp16 = pad(constant_val = const_113_to_fp16, mode = v_padded_17_mode_0, pad = v_padded_17_pad_0, x = input_281_cast_fp16)[name = string("v_padded_17_cast_fp16")]; + tensor var_6014_begin_0 = const()[name = string("op_6014_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6014_end_0 = const()[name = string("op_6014_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6014_end_mask_0 = const()[name = string("op_6014_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6014_cast_fp16 = slice_by_index(begin = var_6014_begin_0, end = var_6014_end_0, end_mask = var_6014_end_mask_0, x = K_sliding_slot_17_cast_fp16)[name = string("op_6014_cast_fp16")]; + int32 var_6021 = const()[name = string("op_6021"), val = int32(2)]; + bool K_sliding_out_17_interleave_0 = const()[name = string("K_sliding_out_17_interleave_0"), val = bool(false)]; + tensor K_sliding_out_17_cast_fp16 = concat(axis = var_6021, interleave = K_sliding_out_17_interleave_0, values = (var_6014_cast_fp16, k_padded_17_cast_fp16))[name = string("K_sliding_out_17_cast_fp16")]; + tensor var_6037_begin_0 = const()[name = string("op_6037_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6037_end_0 = const()[name = string("op_6037_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6037_end_mask_0 = const()[name = string("op_6037_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6037_cast_fp16 = slice_by_index(begin = var_6037_begin_0, end = var_6037_end_0, end_mask = var_6037_end_mask_0, x = V_sliding_slot_17_cast_fp16)[name = string("op_6037_cast_fp16")]; + int32 var_6044 = const()[name = string("op_6044"), val = int32(2)]; + bool V_sliding_out_17_interleave_0 = const()[name = string("V_sliding_out_17_interleave_0"), val = bool(false)]; + tensor V_sliding_out_17_cast_fp16 = concat(axis = var_6044, interleave = V_sliding_out_17_interleave_0, values = (var_6037_cast_fp16, v_padded_17_cast_fp16))[name = string("V_sliding_out_17_cast_fp16")]; + tensor K_for_attn_19_begin_0 = const()[name = string("K_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_19_end_0 = const()[name = string("K_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_19_end_mask_0 = const()[name = string("K_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_19_cast_fp16 = slice_by_index(begin = K_for_attn_19_begin_0, end = K_for_attn_19_end_0, end_mask = K_for_attn_19_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("K_for_attn_19_cast_fp16")]; + tensor V_for_attn_19_begin_0 = const()[name = string("V_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_19_end_0 = const()[name = string("V_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_19_end_mask_0 = const()[name = string("V_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_19_cast_fp16 = slice_by_index(begin = V_for_attn_19_begin_0, end = V_for_attn_19_end_0, end_mask = V_for_attn_19_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("V_for_attn_19_cast_fp16")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_18_reps_0 = const()[name = string("tile_18_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = K_for_attn_19_cast_fp16)[name = string("transpose_49")]; + tensor tile_18_cast_fp16 = tile(reps = tile_18_reps_0, x = transpose_36_cast_fp16)[name = string("tile_18_cast_fp16")]; + tensor concat_36 = const()[name = string("concat_36"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_36_cast_fp16 = reshape(shape = concat_36, x = tile_18_cast_fp16)[name = string("reshape_36_cast_fp16")]; + tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_37 = const()[name = string("concat_37"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = reshape_36_cast_fp16)[name = string("transpose_48")]; + tensor reshape_37_cast_fp16 = reshape(shape = concat_37, x = transpose_37_cast_fp16)[name = string("reshape_37_cast_fp16")]; + tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_19_reps_0 = const()[name = string("tile_19_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_38_cast_fp16 = transpose(perm = transpose_38_perm_0, x = V_for_attn_19_cast_fp16)[name = string("transpose_47")]; + tensor tile_19_cast_fp16 = tile(reps = tile_19_reps_0, x = transpose_38_cast_fp16)[name = string("tile_19_cast_fp16")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_38_cast_fp16 = reshape(shape = concat_38, x = tile_19_cast_fp16)[name = string("reshape_38_cast_fp16")]; + tensor transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_39 = const()[name = string("concat_39"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_39_cast_fp16 = transpose(perm = transpose_39_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_46")]; + tensor reshape_39_cast_fp16 = reshape(shape = concat_39, x = transpose_39_cast_fp16)[name = string("reshape_39_cast_fp16")]; + tensor V_expanded_19_perm_0 = const()[name = string("V_expanded_19_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_37_transpose_x_0 = const()[name = string("attn_weights_37_transpose_x_0"), val = bool(false)]; + bool attn_weights_37_transpose_y_0 = const()[name = string("attn_weights_37_transpose_y_0"), val = bool(false)]; + tensor transpose_57_cast_fp16 = transpose(perm = transpose_57_perm_0, x = reshape_37_cast_fp16)[name = string("transpose_45")]; + tensor attn_weights_37_cast_fp16 = matmul(transpose_x = attn_weights_37_transpose_x_0, transpose_y = attn_weights_37_transpose_y_0, x = q_79_cast_fp16, y = transpose_57_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor x_187_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask_sliding)[name = string("x_187_cast_fp16")]; + tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; + bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; + tensor reduce_max_9 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_187_cast_fp16)[name = string("reduce_max_9")]; + tensor var_6085 = sub(x = x_187_cast_fp16, y = reduce_max_9)[name = string("op_6085")]; + tensor var_6091 = exp(x = var_6085)[name = string("op_6091")]; + tensor var_6101_axes_0 = const()[name = string("op_6101_axes_0"), val = tensor([-1])]; + bool var_6101_keep_dims_0 = const()[name = string("op_6101_keep_dims_0"), val = bool(true)]; + tensor var_6101 = reduce_sum(axes = var_6101_axes_0, keep_dims = var_6101_keep_dims_0, x = var_6091)[name = string("op_6101")]; + tensor var_6107_cast_fp16 = real_div(x = var_6091, y = var_6101)[name = string("op_6107_cast_fp16")]; + bool attn_output_55_transpose_x_0 = const()[name = string("attn_output_55_transpose_x_0"), val = bool(false)]; + bool attn_output_55_transpose_y_0 = const()[name = string("attn_output_55_transpose_y_0"), val = bool(false)]; + tensor V_expanded_19_cast_fp16 = transpose(perm = V_expanded_19_perm_0, x = reshape_39_cast_fp16)[name = string("transpose_44")]; + tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_0, transpose_y = attn_output_55_transpose_y_0, x = var_6107_cast_fp16, y = V_expanded_19_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_6118 = const()[name = string("op_6118"), val = tensor([0, 2, 1, 3])]; + tensor var_6125 = const()[name = string("op_6125"), val = tensor([1, 1, -1])]; + tensor var_6119_cast_fp16 = transpose(perm = var_6118, x = attn_output_55_cast_fp16)[name = string("transpose_43")]; + tensor attn_output_57_cast_fp16 = reshape(shape = var_6125, x = var_6119_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_6130 = const()[name = string("op_6130"), val = tensor([0, 2, 1])]; + string var_6146_pad_type_0 = const()[name = string("op_6146_pad_type_0"), val = string("valid")]; + int32 var_6146_groups_0 = const()[name = string("op_6146_groups_0"), val = int32(1)]; + tensor var_6146_strides_0 = const()[name = string("op_6146_strides_0"), val = tensor([1])]; + tensor var_6146_pad_0 = const()[name = string("op_6146_pad_0"), val = tensor([0, 0])]; + tensor var_6146_dilations_0 = const()[name = string("op_6146_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574428480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577049984))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6131_cast_fp16 = transpose(perm = var_6130, x = attn_output_57_cast_fp16)[name = string("transpose_42")]; + tensor var_6146_cast_fp16 = conv(dilations = var_6146_dilations_0, groups = var_6146_groups_0, pad = var_6146_pad_0, pad_type = var_6146_pad_type_0, strides = var_6146_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6131_cast_fp16)[name = string("op_6146_cast_fp16")]; + tensor var_6150 = const()[name = string("op_6150"), val = tensor([0, 2, 1])]; + int32 var_6156 = const()[name = string("op_6156"), val = int32(-1)]; + fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_191_cast_fp16 = transpose(perm = var_6150, x = var_6146_cast_fp16)[name = string("transpose_41")]; + tensor var_6158_cast_fp16 = mul(x = x_191_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_6158_cast_fp16")]; + bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; + tensor input_285_cast_fp16 = concat(axis = var_6156, interleave = input_285_interleave_0, values = (x_191_cast_fp16, var_6158_cast_fp16))[name = string("input_285_cast_fp16")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_6153_to_fp16 = const()[name = string("op_6153_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_6153_to_fp16, x = input_285_cast_fp16)[name = string("normed_269_cast_fp16")]; + tensor var_6163_split_sizes_0 = const()[name = string("op_6163_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6163_axis_0 = const()[name = string("op_6163_axis_0"), val = int32(-1)]; + tensor var_6163_cast_fp16_0, tensor var_6163_cast_fp16_1 = split(axis = var_6163_axis_0, split_sizes = var_6163_split_sizes_0, x = normed_269_cast_fp16)[name = string("op_6163_cast_fp16")]; + tensor layers_9_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577052608)))]; + tensor attn_output_59_cast_fp16 = mul(x = var_6163_cast_fp16_0, y = layers_9_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor x_193_cast_fp16 = add(x = x_179_cast_fp16, y = attn_output_59_cast_fp16)[name = string("x_193_cast_fp16")]; + int32 var_6172 = const()[name = string("op_6172"), val = int32(-1)]; + fp16 const_115_promoted_to_fp16 = const()[name = string("const_115_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6174_cast_fp16 = mul(x = x_193_cast_fp16, y = const_115_promoted_to_fp16)[name = string("op_6174_cast_fp16")]; + bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; + tensor input_287_cast_fp16 = concat(axis = var_6172, interleave = input_287_interleave_0, values = (x_193_cast_fp16, var_6174_cast_fp16))[name = string("input_287_cast_fp16")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_6169_to_fp16 = const()[name = string("op_6169_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_6169_to_fp16, x = input_287_cast_fp16)[name = string("normed_273_cast_fp16")]; + tensor var_6179_split_sizes_0 = const()[name = string("op_6179_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6179_axis_0 = const()[name = string("op_6179_axis_0"), val = int32(-1)]; + tensor var_6179_cast_fp16_0, tensor var_6179_cast_fp16_1 = split(axis = var_6179_axis_0, split_sizes = var_6179_split_sizes_0, x = normed_273_cast_fp16)[name = string("op_6179_cast_fp16")]; + tensor layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577057792)))]; + tensor h_57_cast_fp16 = mul(x = var_6179_cast_fp16_0, y = layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_57_cast_fp16")]; + tensor var_6190 = const()[name = string("op_6190"), val = tensor([0, 2, 1])]; + tensor input_289_axes_0 = const()[name = string("input_289_axes_0"), val = tensor([2])]; + tensor var_6191 = transpose(perm = var_6190, x = h_57_cast_fp16)[name = string("transpose_40")]; + tensor input_289 = expand_dims(axes = input_289_axes_0, x = var_6191)[name = string("input_289")]; + string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; + tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; + tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; + int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; + tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_289)[name = string("gate_37")]; + string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; + tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; + tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; + int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; + tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_289)[name = string("up_19")]; + string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; + tensor input_291 = mul(x = gate_39, y = up_19)[name = string("input_291")]; + string mlp_out_19_pad_type_0 = const()[name = string("mlp_out_19_pad_type_0"), val = string("valid")]; + tensor mlp_out_19_strides_0 = const()[name = string("mlp_out_19_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_19_pad_0 = const()[name = string("mlp_out_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_19_dilations_0 = const()[name = string("mlp_out_19_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_19_groups_0 = const()[name = string("mlp_out_19_groups_0"), val = int32(1)]; + tensor mlp_out_19 = conv(dilations = mlp_out_19_dilations_0, groups = mlp_out_19_groups_0, pad = mlp_out_19_pad_0, pad_type = mlp_out_19_pad_type_0, strides = mlp_out_19_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_291)[name = string("mlp_out_19")]; + tensor var_6231_axes_0 = const()[name = string("op_6231_axes_0"), val = tensor([2])]; + tensor var_6231 = squeeze(axes = var_6231_axes_0, x = mlp_out_19)[name = string("op_6231")]; + tensor var_6235 = const()[name = string("op_6235"), val = tensor([0, 2, 1])]; + int32 var_6241 = const()[name = string("op_6241"), val = int32(-1)]; + fp16 const_116_promoted = const()[name = string("const_116_promoted"), val = fp16(-0x1p+0)]; + tensor x_195 = transpose(perm = var_6235, x = var_6231)[name = string("transpose_39")]; + tensor var_6243 = mul(x = x_195, y = const_116_promoted)[name = string("op_6243")]; + bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; + tensor input_293 = concat(axis = var_6241, interleave = input_293_interleave_0, values = (x_195, var_6243))[name = string("input_293")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_6238_to_fp16 = const()[name = string("op_6238_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_6238_to_fp16, x = input_293)[name = string("normed_277_cast_fp16")]; + tensor var_6248_split_sizes_0 = const()[name = string("op_6248_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6248_axis_0 = const()[name = string("op_6248_axis_0"), val = int32(-1)]; + tensor var_6248_0, tensor var_6248_1 = split(axis = var_6248_axis_0, split_sizes = var_6248_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_6248")]; + tensor hidden_states_93 = mul(x = var_6248_0, y = layers_9_post_feedforward_layernorm_weight)[name = string("hidden_states_93")]; + tensor hidden_states_95_cast_fp16 = add(x = x_193_cast_fp16, y = hidden_states_93)[name = string("hidden_states_95_cast_fp16")]; + tensor per_layer_slice_19_begin_0 = const()[name = string("per_layer_slice_19_begin_0"), val = tensor([0, 0, 2304])]; + tensor per_layer_slice_19_end_0 = const()[name = string("per_layer_slice_19_end_0"), val = tensor([1, 1, 2560])]; + tensor per_layer_slice_19_end_mask_0 = const()[name = string("per_layer_slice_19_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_19_cast_fp16 = slice_by_index(begin = per_layer_slice_19_begin_0, end = per_layer_slice_19_end_0, end_mask = per_layer_slice_19_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_19_cast_fp16")]; + tensor var_6276 = const()[name = string("op_6276"), val = tensor([0, 2, 1])]; + tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; + tensor var_6277 = transpose(perm = var_6276, x = hidden_states_95_cast_fp16)[name = string("transpose_38")]; + tensor input_295 = expand_dims(axes = input_295_axes_0, x = var_6277)[name = string("input_295")]; + string gated_55_pad_type_0 = const()[name = string("gated_55_pad_type_0"), val = string("valid")]; + tensor gated_55_strides_0 = const()[name = string("gated_55_strides_0"), val = tensor([1, 1])]; + tensor gated_55_pad_0 = const()[name = string("gated_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_55_dilations_0 = const()[name = string("gated_55_dilations_0"), val = tensor([1, 1])]; + int32 gated_55_groups_0 = const()[name = string("gated_55_groups_0"), val = int32(1)]; + tensor gated_55 = conv(dilations = gated_55_dilations_0, groups = gated_55_groups_0, pad = gated_55_pad_0, pad_type = gated_55_pad_type_0, strides = gated_55_strides_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = input_295)[name = string("gated_55")]; + string gated_57_mode_0 = const()[name = string("gated_57_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_57 = gelu(mode = gated_57_mode_0, x = gated_55)[name = string("gated_57")]; + tensor var_6296 = const()[name = string("op_6296"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_19_axes_0 = const()[name = string("per_layer_slice_conv_19_axes_0"), val = tensor([2])]; + tensor var_6297_cast_fp16 = transpose(perm = var_6296, x = per_layer_slice_19_cast_fp16)[name = string("transpose_37")]; + tensor per_layer_slice_conv_19_cast_fp16 = expand_dims(axes = per_layer_slice_conv_19_axes_0, x = var_6297_cast_fp16)[name = string("per_layer_slice_conv_19_cast_fp16")]; + tensor input_297_cast_fp16 = mul(x = gated_57, y = per_layer_slice_conv_19_cast_fp16)[name = string("input_297_cast_fp16")]; + string gated_59_pad_type_0 = const()[name = string("gated_59_pad_type_0"), val = string("valid")]; + tensor gated_59_strides_0 = const()[name = string("gated_59_strides_0"), val = tensor([1, 1])]; + tensor gated_59_pad_0 = const()[name = string("gated_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_59_dilations_0 = const()[name = string("gated_59_dilations_0"), val = tensor([1, 1])]; + int32 gated_59_groups_0 = const()[name = string("gated_59_groups_0"), val = int32(1)]; + tensor layers_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577062976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577390720))))[name = string("layers_9_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_59_cast_fp16 = conv(dilations = gated_59_dilations_0, groups = gated_59_groups_0, pad = gated_59_pad_0, pad_type = gated_59_pad_type_0, strides = gated_59_strides_0, weight = layers_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("gated_59_cast_fp16")]; + tensor var_6313_axes_0 = const()[name = string("op_6313_axes_0"), val = tensor([2])]; + tensor var_6313_cast_fp16 = squeeze(axes = var_6313_axes_0, x = gated_59_cast_fp16)[name = string("op_6313_cast_fp16")]; + tensor var_6317 = const()[name = string("op_6317"), val = tensor([0, 2, 1])]; + int32 var_6323 = const()[name = string("op_6323"), val = int32(-1)]; + fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_197_cast_fp16 = transpose(perm = var_6317, x = var_6313_cast_fp16)[name = string("transpose_36")]; + tensor var_6325_cast_fp16 = mul(x = x_197_cast_fp16, y = const_117_promoted_to_fp16)[name = string("op_6325_cast_fp16")]; + bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; + tensor input_299_cast_fp16 = concat(axis = var_6323, interleave = input_299_interleave_0, values = (x_197_cast_fp16, var_6325_cast_fp16))[name = string("input_299_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_6320_to_fp16 = const()[name = string("op_6320_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_6320_to_fp16, x = input_299_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor var_6330_split_sizes_0 = const()[name = string("op_6330_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6330_axis_0 = const()[name = string("op_6330_axis_0"), val = int32(-1)]; + tensor var_6330_cast_fp16_0, tensor var_6330_cast_fp16_1 = split(axis = var_6330_axis_0, split_sizes = var_6330_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_6330_cast_fp16")]; + tensor layers_9_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577393344)))]; + tensor hidden_states_99_cast_fp16 = mul(x = var_6330_cast_fp16_0, y = layers_9_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + tensor const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = tensor([0x1.a8p-2])]; + tensor x_199_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_118_promoted_to_fp16)[name = string("x_199_cast_fp16")]; + tensor var_6342_axes_0 = const()[name = string("op_6342_axes_0"), val = tensor([0])]; + tensor var_6342_cast_fp16 = squeeze(axes = var_6342_axes_0, x = K_sliding_out_17_cast_fp16)[name = string("op_6342_cast_fp16")]; + tensor var_6344_axes_0 = const()[name = string("op_6344_axes_0"), val = tensor([0])]; + tensor var_6344_cast_fp16 = squeeze(axes = var_6344_axes_0, x = V_sliding_out_17_cast_fp16)[name = string("op_6344_cast_fp16")]; + tensor var_6347_begin_0 = const()[name = string("op_6347_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6347_end_0 = const()[name = string("op_6347_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6347_end_mask_0 = const()[name = string("op_6347_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6347_squeeze_mask_0 = const()[name = string("op_6347_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6347_cast_fp16 = slice_by_index(begin = var_6347_begin_0, end = var_6347_end_0, end_mask = var_6347_end_mask_0, squeeze_mask = var_6347_squeeze_mask_0, x = K_sliding_in)[name = string("op_6347_cast_fp16")]; + tensor K_sliding_slot_axes_0 = const()[name = string("K_sliding_slot_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_cast_fp16 = expand_dims(axes = K_sliding_slot_axes_0, x = var_6347_cast_fp16)[name = string("K_sliding_slot_cast_fp16")]; + tensor var_6352_begin_0 = const()[name = string("op_6352_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6352_end_0 = const()[name = string("op_6352_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6352_end_mask_0 = const()[name = string("op_6352_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6352_squeeze_mask_0 = const()[name = string("op_6352_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6352_cast_fp16 = slice_by_index(begin = var_6352_begin_0, end = var_6352_end_0, end_mask = var_6352_end_mask_0, squeeze_mask = var_6352_squeeze_mask_0, x = V_sliding_in)[name = string("op_6352_cast_fp16")]; + tensor V_sliding_slot_axes_0 = const()[name = string("V_sliding_slot_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_cast_fp16 = expand_dims(axes = V_sliding_slot_axes_0, x = var_6352_cast_fp16)[name = string("V_sliding_slot_cast_fp16")]; + int32 var_6359 = const()[name = string("op_6359"), val = int32(-1)]; + fp16 const_119_promoted_to_fp16 = const()[name = string("const_119_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6361_cast_fp16 = mul(x = x_199_cast_fp16, y = const_119_promoted_to_fp16)[name = string("op_6361_cast_fp16")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301_cast_fp16 = concat(axis = var_6359, interleave = input_301_interleave_0, values = (x_199_cast_fp16, var_6361_cast_fp16))[name = string("input_301_cast_fp16")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_6356_to_fp16 = const()[name = string("op_6356_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_6356_to_fp16, x = input_301_cast_fp16)[name = string("normed_285_cast_fp16")]; + tensor var_6366_split_sizes_0 = const()[name = string("op_6366_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6366_axis_0 = const()[name = string("op_6366_axis_0"), val = int32(-1)]; + tensor var_6366_cast_fp16_0, tensor var_6366_cast_fp16_1 = split(axis = var_6366_axis_0, split_sizes = var_6366_split_sizes_0, x = normed_285_cast_fp16)[name = string("op_6366_cast_fp16")]; + tensor layers_10_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577398528)))]; + tensor h_61_cast_fp16 = mul(x = var_6366_cast_fp16_0, y = layers_10_input_layernorm_weight_promoted_to_fp16)[name = string("h_61_cast_fp16")]; + tensor var_6372 = const()[name = string("op_6372"), val = tensor([0, 2, 1])]; + tensor var_6375_axes_0 = const()[name = string("op_6375_axes_0"), val = tensor([2])]; + tensor var_6373_cast_fp16 = transpose(perm = var_6372, x = h_61_cast_fp16)[name = string("transpose_35")]; + tensor var_6375_cast_fp16 = expand_dims(axes = var_6375_axes_0, x = var_6373_cast_fp16)[name = string("op_6375_cast_fp16")]; + string var_6391_pad_type_0 = const()[name = string("op_6391_pad_type_0"), val = string("valid")]; + tensor var_6391_strides_0 = const()[name = string("op_6391_strides_0"), val = tensor([1, 1])]; + tensor var_6391_pad_0 = const()[name = string("op_6391_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6391_dilations_0 = const()[name = string("op_6391_dilations_0"), val = tensor([1, 1])]; + int32 var_6391_groups_0 = const()[name = string("op_6391_groups_0"), val = int32(1)]; + tensor var_6391 = conv(dilations = var_6391_dilations_0, groups = var_6391_groups_0, pad = var_6391_pad_0, pad_type = var_6391_pad_type_0, strides = var_6391_strides_0, weight = layers_10_self_attn_q_proj_weight_palettized, x = var_6375_cast_fp16)[name = string("op_6391")]; + tensor var_6396 = const()[name = string("op_6396"), val = tensor([1, 8, 256, 1])]; + tensor var_6397 = reshape(shape = var_6396, x = var_6391)[name = string("op_6397")]; + tensor var_6402 = const()[name = string("op_6402"), val = tensor([0, 1, 3, 2])]; + tensor var_6412 = const()[name = string("op_6412"), val = tensor([1, 8, 256])]; + tensor var_6403 = transpose(perm = var_6402, x = var_6397)[name = string("transpose_34")]; + tensor x_201 = reshape(shape = var_6412, x = var_6403)[name = string("x_201")]; + int32 var_6418 = const()[name = string("op_6418"), val = int32(-1)]; + fp16 const_120_promoted = const()[name = string("const_120_promoted"), val = fp16(-0x1p+0)]; + tensor var_6420 = mul(x = x_201, y = const_120_promoted)[name = string("op_6420")]; + bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; + tensor input_305 = concat(axis = var_6418, interleave = input_305_interleave_0, values = (x_201, var_6420))[name = string("input_305")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_6415_to_fp16 = const()[name = string("op_6415_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_6415_to_fp16, x = input_305)[name = string("normed_289_cast_fp16")]; + tensor var_6425_split_sizes_0 = const()[name = string("op_6425_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6425_axis_0 = const()[name = string("op_6425_axis_0"), val = int32(-1)]; + tensor var_6425_0, tensor var_6425_1 = split(axis = var_6425_axis_0, split_sizes = var_6425_split_sizes_0, x = normed_289_cast_fp16)[name = string("op_6425")]; + tensor var_6427 = mul(x = var_6425_0, y = layers_3_self_attn_q_norm_weight)[name = string("op_6427")]; + tensor var_6432 = const()[name = string("op_6432"), val = tensor([1, 8, 1, 256])]; + tensor q_83 = reshape(shape = var_6432, x = var_6427)[name = string("q_83")]; + tensor var_6434_cast_fp16 = mul(x = q_83, y = cos_s)[name = string("op_6434_cast_fp16")]; + tensor var_6435_split_sizes_0 = const()[name = string("op_6435_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6435_axis_0 = const()[name = string("op_6435_axis_0"), val = int32(-1)]; + tensor var_6435_0, tensor var_6435_1 = split(axis = var_6435_axis_0, split_sizes = var_6435_split_sizes_0, x = q_83)[name = string("op_6435")]; + fp16 const_121_promoted = const()[name = string("const_121_promoted"), val = fp16(-0x1p+0)]; + tensor var_6437 = mul(x = var_6435_1, y = const_121_promoted)[name = string("op_6437")]; + int32 var_6439 = const()[name = string("op_6439"), val = int32(-1)]; + bool var_6440_interleave_0 = const()[name = string("op_6440_interleave_0"), val = bool(false)]; + tensor var_6440 = concat(axis = var_6439, interleave = var_6440_interleave_0, values = (var_6437, var_6435_0))[name = string("op_6440")]; + tensor var_6441_cast_fp16 = mul(x = var_6440, y = sin_s)[name = string("op_6441_cast_fp16")]; + tensor q_87_cast_fp16 = add(x = var_6434_cast_fp16, y = var_6441_cast_fp16)[name = string("q_87_cast_fp16")]; + string var_6454_pad_type_0 = const()[name = string("op_6454_pad_type_0"), val = string("valid")]; + tensor var_6454_strides_0 = const()[name = string("op_6454_strides_0"), val = tensor([1, 1])]; + tensor var_6454_pad_0 = const()[name = string("op_6454_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6454_dilations_0 = const()[name = string("op_6454_dilations_0"), val = tensor([1, 1])]; + int32 var_6454_groups_0 = const()[name = string("op_6454_groups_0"), val = int32(1)]; + tensor var_6454 = conv(dilations = var_6454_dilations_0, groups = var_6454_groups_0, pad = var_6454_pad_0, pad_type = var_6454_pad_type_0, strides = var_6454_strides_0, weight = layers_10_self_attn_k_proj_weight_palettized, x = var_6375_cast_fp16)[name = string("op_6454")]; + tensor var_6459 = const()[name = string("op_6459"), val = tensor([1, 2, 256, 1])]; + tensor var_6460 = reshape(shape = var_6459, x = var_6454)[name = string("op_6460")]; + tensor var_6465 = const()[name = string("op_6465"), val = tensor([0, 1, 3, 2])]; + string var_6482_pad_type_0 = const()[name = string("op_6482_pad_type_0"), val = string("valid")]; + tensor var_6482_strides_0 = const()[name = string("op_6482_strides_0"), val = tensor([1, 1])]; + tensor var_6482_pad_0 = const()[name = string("op_6482_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6482_dilations_0 = const()[name = string("op_6482_dilations_0"), val = tensor([1, 1])]; + int32 var_6482_groups_0 = const()[name = string("op_6482_groups_0"), val = int32(1)]; + tensor var_6482 = conv(dilations = var_6482_dilations_0, groups = var_6482_groups_0, pad = var_6482_pad_0, pad_type = var_6482_pad_type_0, strides = var_6482_strides_0, weight = layers_10_self_attn_v_proj_weight_palettized, x = var_6375_cast_fp16)[name = string("op_6482")]; + tensor var_6487 = const()[name = string("op_6487"), val = tensor([1, 2, 256, 1])]; + tensor var_6488 = reshape(shape = var_6487, x = var_6482)[name = string("op_6488")]; + tensor var_6493 = const()[name = string("op_6493"), val = tensor([0, 1, 3, 2])]; + tensor var_6503 = const()[name = string("op_6503"), val = tensor([1, 2, 256])]; + tensor var_6466 = transpose(perm = var_6465, x = var_6460)[name = string("transpose_33")]; + tensor x_203 = reshape(shape = var_6503, x = var_6466)[name = string("x_203")]; + int32 var_6509 = const()[name = string("op_6509"), val = int32(-1)]; + fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; + tensor var_6511 = mul(x = x_203, y = const_122_promoted)[name = string("op_6511")]; + bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; + tensor input_307 = concat(axis = var_6509, interleave = input_307_interleave_0, values = (x_203, var_6511))[name = string("input_307")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_6506_to_fp16 = const()[name = string("op_6506_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_6506_to_fp16, x = input_307)[name = string("normed_293_cast_fp16")]; + tensor var_6516_split_sizes_0 = const()[name = string("op_6516_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6516_axis_0 = const()[name = string("op_6516_axis_0"), val = int32(-1)]; + tensor var_6516_0, tensor var_6516_1 = split(axis = var_6516_axis_0, split_sizes = var_6516_split_sizes_0, x = normed_293_cast_fp16)[name = string("op_6516")]; + tensor var_6518 = mul(x = var_6516_0, y = layers_6_self_attn_k_norm_weight)[name = string("op_6518")]; + tensor var_6523 = const()[name = string("op_6523"), val = tensor([1, 2, 1, 256])]; + tensor q_85 = reshape(shape = var_6523, x = var_6518)[name = string("q_85")]; + fp16 var_6525_promoted = const()[name = string("op_6525_promoted"), val = fp16(0x1p+1)]; + tensor var_6494 = transpose(perm = var_6493, x = var_6488)[name = string("transpose_32")]; + tensor var_6526 = pow(x = var_6494, y = var_6525_promoted)[name = string("op_6526")]; + tensor var_6531_axes_0 = const()[name = string("op_6531_axes_0"), val = tensor([-1])]; + bool var_6531_keep_dims_0 = const()[name = string("op_6531_keep_dims_0"), val = bool(true)]; + tensor var_6531 = reduce_mean(axes = var_6531_axes_0, keep_dims = var_6531_keep_dims_0, x = var_6526)[name = string("op_6531")]; + fp16 var_6533_to_fp16 = const()[name = string("op_6533_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_21_cast_fp16 = add(x = var_6531, y = var_6533_to_fp16)[name = string("mean_sq_21_cast_fp16")]; + fp32 var_6535_epsilon_0 = const()[name = string("op_6535_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6535_cast_fp16 = rsqrt(epsilon = var_6535_epsilon_0, x = mean_sq_21_cast_fp16)[name = string("op_6535_cast_fp16")]; + tensor input_311_cast_fp16 = mul(x = var_6494, y = var_6535_cast_fp16)[name = string("input_311_cast_fp16")]; + tensor var_6537_cast_fp16 = mul(x = q_85, y = cos_s)[name = string("op_6537_cast_fp16")]; + tensor var_6538_split_sizes_0 = const()[name = string("op_6538_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6538_axis_0 = const()[name = string("op_6538_axis_0"), val = int32(-1)]; + tensor var_6538_0, tensor var_6538_1 = split(axis = var_6538_axis_0, split_sizes = var_6538_split_sizes_0, x = q_85)[name = string("op_6538")]; + fp16 const_123_promoted = const()[name = string("const_123_promoted"), val = fp16(-0x1p+0)]; + tensor var_6540 = mul(x = var_6538_1, y = const_123_promoted)[name = string("op_6540")]; + int32 var_6542 = const()[name = string("op_6542"), val = int32(-1)]; + bool var_6543_interleave_0 = const()[name = string("op_6543_interleave_0"), val = bool(false)]; + tensor var_6543 = concat(axis = var_6542, interleave = var_6543_interleave_0, values = (var_6540, var_6538_0))[name = string("op_6543")]; + tensor var_6544_cast_fp16 = mul(x = var_6543, y = sin_s)[name = string("op_6544_cast_fp16")]; + tensor input_309_cast_fp16 = add(x = var_6537_cast_fp16, y = var_6544_cast_fp16)[name = string("input_309_cast_fp16")]; + tensor k_padded_pad_0 = const()[name = string("k_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_mode_0 = const()[name = string("k_padded_mode_0"), val = string("constant")]; + fp16 const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_cast_fp16 = pad(constant_val = const_124_to_fp16, mode = k_padded_mode_0, pad = k_padded_pad_0, x = input_309_cast_fp16)[name = string("k_padded_cast_fp16")]; + tensor v_padded_pad_0 = const()[name = string("v_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_mode_0 = const()[name = string("v_padded_mode_0"), val = string("constant")]; + fp16 const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_cast_fp16 = pad(constant_val = const_125_to_fp16, mode = v_padded_mode_0, pad = v_padded_pad_0, x = input_311_cast_fp16)[name = string("v_padded_cast_fp16")]; + tensor var_6573_begin_0 = const()[name = string("op_6573_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6573_end_0 = const()[name = string("op_6573_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6573_end_mask_0 = const()[name = string("op_6573_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6573_cast_fp16 = slice_by_index(begin = var_6573_begin_0, end = var_6573_end_0, end_mask = var_6573_end_mask_0, x = K_sliding_slot_cast_fp16)[name = string("op_6573_cast_fp16")]; + int32 var_6580 = const()[name = string("op_6580"), val = int32(2)]; + bool K_sliding_out_interleave_0 = const()[name = string("K_sliding_out_interleave_0"), val = bool(false)]; + tensor K_sliding_out_cast_fp16 = concat(axis = var_6580, interleave = K_sliding_out_interleave_0, values = (var_6573_cast_fp16, k_padded_cast_fp16))[name = string("K_sliding_out_cast_fp16")]; + tensor var_6596_begin_0 = const()[name = string("op_6596_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6596_end_0 = const()[name = string("op_6596_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6596_end_mask_0 = const()[name = string("op_6596_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6596_cast_fp16 = slice_by_index(begin = var_6596_begin_0, end = var_6596_end_0, end_mask = var_6596_end_mask_0, x = V_sliding_slot_cast_fp16)[name = string("op_6596_cast_fp16")]; + int32 var_6603 = const()[name = string("op_6603"), val = int32(2)]; + bool V_sliding_out_interleave_0 = const()[name = string("V_sliding_out_interleave_0"), val = bool(false)]; + tensor V_sliding_out_cast_fp16 = concat(axis = var_6603, interleave = V_sliding_out_interleave_0, values = (var_6596_cast_fp16, v_padded_cast_fp16))[name = string("V_sliding_out_cast_fp16")]; + tensor K_for_attn_21_begin_0 = const()[name = string("K_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_21_end_0 = const()[name = string("K_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_21_end_mask_0 = const()[name = string("K_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_21_cast_fp16 = slice_by_index(begin = K_for_attn_21_begin_0, end = K_for_attn_21_end_0, end_mask = K_for_attn_21_end_mask_0, x = K_sliding_out_cast_fp16)[name = string("K_for_attn_21_cast_fp16")]; + tensor V_for_attn_21_begin_0 = const()[name = string("V_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_21_end_0 = const()[name = string("V_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_21_end_mask_0 = const()[name = string("V_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_21_cast_fp16 = slice_by_index(begin = V_for_attn_21_begin_0, end = V_for_attn_21_end_0, end_mask = V_for_attn_21_end_mask_0, x = V_sliding_out_cast_fp16)[name = string("V_for_attn_21_cast_fp16")]; + tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_20_reps_0 = const()[name = string("tile_20_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_40_cast_fp16 = transpose(perm = transpose_40_perm_0, x = K_for_attn_21_cast_fp16)[name = string("transpose_31")]; + tensor tile_20_cast_fp16 = tile(reps = tile_20_reps_0, x = transpose_40_cast_fp16)[name = string("tile_20_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_40_cast_fp16 = reshape(shape = concat_40, x = tile_20_cast_fp16)[name = string("reshape_40_cast_fp16")]; + tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_40_cast_fp16)[name = string("transpose_30")]; + tensor reshape_41_cast_fp16 = reshape(shape = concat_41, x = transpose_41_cast_fp16)[name = string("reshape_41_cast_fp16")]; + tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_21_reps_0 = const()[name = string("tile_21_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_42_cast_fp16 = transpose(perm = transpose_42_perm_0, x = V_for_attn_21_cast_fp16)[name = string("transpose_29")]; + tensor tile_21_cast_fp16 = tile(reps = tile_21_reps_0, x = transpose_42_cast_fp16)[name = string("tile_21_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_42_cast_fp16 = reshape(shape = concat_42, x = tile_21_cast_fp16)[name = string("reshape_42_cast_fp16")]; + tensor transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_43_cast_fp16 = transpose(perm = transpose_43_perm_0, x = reshape_42_cast_fp16)[name = string("transpose_28")]; + tensor reshape_43_cast_fp16 = reshape(shape = concat_43, x = transpose_43_cast_fp16)[name = string("reshape_43_cast_fp16")]; + tensor V_expanded_21_perm_0 = const()[name = string("V_expanded_21_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor transpose_58_cast_fp16 = transpose(perm = transpose_58_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_27")]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = q_87_cast_fp16, y = transpose_58_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask_sliding)[name = string("x_207_cast_fp16")]; + tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; + bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; + tensor reduce_max_10 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = x_207_cast_fp16)[name = string("reduce_max_10")]; + tensor var_6644 = sub(x = x_207_cast_fp16, y = reduce_max_10)[name = string("op_6644")]; + tensor var_6650 = exp(x = var_6644)[name = string("op_6650")]; + tensor var_6660_axes_0 = const()[name = string("op_6660_axes_0"), val = tensor([-1])]; + bool var_6660_keep_dims_0 = const()[name = string("op_6660_keep_dims_0"), val = bool(true)]; + tensor var_6660 = reduce_sum(axes = var_6660_axes_0, keep_dims = var_6660_keep_dims_0, x = var_6650)[name = string("op_6660")]; + tensor var_6666_cast_fp16 = real_div(x = var_6650, y = var_6660)[name = string("op_6666_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor V_expanded_21_cast_fp16 = transpose(perm = V_expanded_21_perm_0, x = reshape_43_cast_fp16)[name = string("transpose_26")]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = var_6666_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_6677 = const()[name = string("op_6677"), val = tensor([0, 2, 1, 3])]; + tensor var_6684 = const()[name = string("op_6684"), val = tensor([1, 1, -1])]; + tensor var_6678_cast_fp16 = transpose(perm = var_6677, x = attn_output_61_cast_fp16)[name = string("transpose_25")]; + tensor attn_output_63_cast_fp16 = reshape(shape = var_6684, x = var_6678_cast_fp16)[name = string("attn_output_63_cast_fp16")]; + tensor var_6689 = const()[name = string("op_6689"), val = tensor([0, 2, 1])]; + string var_6705_pad_type_0 = const()[name = string("op_6705_pad_type_0"), val = string("valid")]; + int32 var_6705_groups_0 = const()[name = string("op_6705_groups_0"), val = int32(1)]; + tensor var_6705_strides_0 = const()[name = string("op_6705_strides_0"), val = tensor([1])]; + tensor var_6705_pad_0 = const()[name = string("op_6705_pad_0"), val = tensor([0, 0])]; + tensor var_6705_dilations_0 = const()[name = string("op_6705_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577403712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580025216))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6690_cast_fp16 = transpose(perm = var_6689, x = attn_output_63_cast_fp16)[name = string("transpose_24")]; + tensor var_6705_cast_fp16 = conv(dilations = var_6705_dilations_0, groups = var_6705_groups_0, pad = var_6705_pad_0, pad_type = var_6705_pad_type_0, strides = var_6705_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_6690_cast_fp16)[name = string("op_6705_cast_fp16")]; + tensor var_6709 = const()[name = string("op_6709"), val = tensor([0, 2, 1])]; + int32 var_6715 = const()[name = string("op_6715"), val = int32(-1)]; + fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_211_cast_fp16 = transpose(perm = var_6709, x = var_6705_cast_fp16)[name = string("transpose_23")]; + tensor var_6717_cast_fp16 = mul(x = x_211_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_6717_cast_fp16")]; + bool input_315_interleave_0 = const()[name = string("input_315_interleave_0"), val = bool(false)]; + tensor input_315_cast_fp16 = concat(axis = var_6715, interleave = input_315_interleave_0, values = (x_211_cast_fp16, var_6717_cast_fp16))[name = string("input_315_cast_fp16")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_6712_to_fp16 = const()[name = string("op_6712_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_6712_to_fp16, x = input_315_cast_fp16)[name = string("normed_297_cast_fp16")]; + tensor var_6722_split_sizes_0 = const()[name = string("op_6722_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6722_axis_0 = const()[name = string("op_6722_axis_0"), val = int32(-1)]; + tensor var_6722_cast_fp16_0, tensor var_6722_cast_fp16_1 = split(axis = var_6722_axis_0, split_sizes = var_6722_split_sizes_0, x = normed_297_cast_fp16)[name = string("op_6722_cast_fp16")]; + tensor layers_10_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580027840)))]; + tensor attn_output_65_cast_fp16 = mul(x = var_6722_cast_fp16_0, y = layers_10_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_199_cast_fp16, y = attn_output_65_cast_fp16)[name = string("x_213_cast_fp16")]; + int32 var_6731 = const()[name = string("op_6731"), val = int32(-1)]; + fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6733_cast_fp16 = mul(x = x_213_cast_fp16, y = const_127_promoted_to_fp16)[name = string("op_6733_cast_fp16")]; + bool input_317_interleave_0 = const()[name = string("input_317_interleave_0"), val = bool(false)]; + tensor input_317_cast_fp16 = concat(axis = var_6731, interleave = input_317_interleave_0, values = (x_213_cast_fp16, var_6733_cast_fp16))[name = string("input_317_cast_fp16")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_6728_to_fp16 = const()[name = string("op_6728_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_6728_to_fp16, x = input_317_cast_fp16)[name = string("normed_301_cast_fp16")]; + tensor var_6738_split_sizes_0 = const()[name = string("op_6738_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6738_axis_0 = const()[name = string("op_6738_axis_0"), val = int32(-1)]; + tensor var_6738_cast_fp16_0, tensor var_6738_cast_fp16_1 = split(axis = var_6738_axis_0, split_sizes = var_6738_split_sizes_0, x = normed_301_cast_fp16)[name = string("op_6738_cast_fp16")]; + tensor layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580033024)))]; + tensor h_63_cast_fp16 = mul(x = var_6738_cast_fp16_0, y = layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_63_cast_fp16")]; + tensor var_6749 = const()[name = string("op_6749"), val = tensor([0, 2, 1])]; + tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; + tensor var_6750 = transpose(perm = var_6749, x = h_63_cast_fp16)[name = string("transpose_22")]; + tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_6750)[name = string("input_319")]; + string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; + tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; + tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; + int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; + tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = layers_10_mlp_gate_proj_weight_palettized, x = input_319)[name = string("gate_41")]; + string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; + tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; + tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; + int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; + tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = layers_10_mlp_up_proj_weight_palettized, x = input_319)[name = string("up_21")]; + string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; + tensor input_321 = mul(x = gate_43, y = up_21)[name = string("input_321")]; + string mlp_out_21_pad_type_0 = const()[name = string("mlp_out_21_pad_type_0"), val = string("valid")]; + tensor mlp_out_21_strides_0 = const()[name = string("mlp_out_21_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_21_pad_0 = const()[name = string("mlp_out_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_21_dilations_0 = const()[name = string("mlp_out_21_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_21_groups_0 = const()[name = string("mlp_out_21_groups_0"), val = int32(1)]; + tensor mlp_out_21 = conv(dilations = mlp_out_21_dilations_0, groups = mlp_out_21_groups_0, pad = mlp_out_21_pad_0, pad_type = mlp_out_21_pad_type_0, strides = mlp_out_21_strides_0, weight = layers_10_mlp_down_proj_weight_palettized, x = input_321)[name = string("mlp_out_21")]; + tensor var_6790_axes_0 = const()[name = string("op_6790_axes_0"), val = tensor([2])]; + tensor var_6790 = squeeze(axes = var_6790_axes_0, x = mlp_out_21)[name = string("op_6790")]; + tensor var_6794 = const()[name = string("op_6794"), val = tensor([0, 2, 1])]; + int32 var_6800 = const()[name = string("op_6800"), val = int32(-1)]; + fp16 const_128_promoted = const()[name = string("const_128_promoted"), val = fp16(-0x1p+0)]; + tensor x_215 = transpose(perm = var_6794, x = var_6790)[name = string("transpose_21")]; + tensor var_6802 = mul(x = x_215, y = const_128_promoted)[name = string("op_6802")]; + bool input_323_interleave_0 = const()[name = string("input_323_interleave_0"), val = bool(false)]; + tensor input_323 = concat(axis = var_6800, interleave = input_323_interleave_0, values = (x_215, var_6802))[name = string("input_323")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_6797_to_fp16 = const()[name = string("op_6797_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_6797_to_fp16, x = input_323)[name = string("normed_305_cast_fp16")]; + tensor var_6807_split_sizes_0 = const()[name = string("op_6807_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6807_axis_0 = const()[name = string("op_6807_axis_0"), val = int32(-1)]; + tensor var_6807_0, tensor var_6807_1 = split(axis = var_6807_axis_0, split_sizes = var_6807_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_6807")]; + tensor hidden_states_103 = mul(x = var_6807_0, y = layers_10_post_feedforward_layernorm_weight)[name = string("hidden_states_103")]; + tensor hidden_states_105_cast_fp16 = add(x = x_213_cast_fp16, y = hidden_states_103)[name = string("hidden_states_105_cast_fp16")]; + tensor per_layer_slice_21_begin_0 = const()[name = string("per_layer_slice_21_begin_0"), val = tensor([0, 0, 2560])]; + tensor per_layer_slice_21_end_0 = const()[name = string("per_layer_slice_21_end_0"), val = tensor([1, 1, 2816])]; + tensor per_layer_slice_21_end_mask_0 = const()[name = string("per_layer_slice_21_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_21_cast_fp16 = slice_by_index(begin = per_layer_slice_21_begin_0, end = per_layer_slice_21_end_0, end_mask = per_layer_slice_21_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_21_cast_fp16")]; + tensor var_6835 = const()[name = string("op_6835"), val = tensor([0, 2, 1])]; + tensor input_325_axes_0 = const()[name = string("input_325_axes_0"), val = tensor([2])]; + tensor var_6836 = transpose(perm = var_6835, x = hidden_states_105_cast_fp16)[name = string("transpose_20")]; + tensor input_325 = expand_dims(axes = input_325_axes_0, x = var_6836)[name = string("input_325")]; + string gated_61_pad_type_0 = const()[name = string("gated_61_pad_type_0"), val = string("valid")]; + tensor gated_61_strides_0 = const()[name = string("gated_61_strides_0"), val = tensor([1, 1])]; + tensor gated_61_pad_0 = const()[name = string("gated_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_61_dilations_0 = const()[name = string("gated_61_dilations_0"), val = tensor([1, 1])]; + int32 gated_61_groups_0 = const()[name = string("gated_61_groups_0"), val = int32(1)]; + tensor gated_61 = conv(dilations = gated_61_dilations_0, groups = gated_61_groups_0, pad = gated_61_pad_0, pad_type = gated_61_pad_type_0, strides = gated_61_strides_0, weight = layers_10_per_layer_input_gate_weight_palettized, x = input_325)[name = string("gated_61")]; + string gated_63_mode_0 = const()[name = string("gated_63_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_63 = gelu(mode = gated_63_mode_0, x = gated_61)[name = string("gated_63")]; + tensor var_6855 = const()[name = string("op_6855"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_21_axes_0 = const()[name = string("per_layer_slice_conv_21_axes_0"), val = tensor([2])]; + tensor var_6856_cast_fp16 = transpose(perm = var_6855, x = per_layer_slice_21_cast_fp16)[name = string("transpose_19")]; + tensor per_layer_slice_conv_21_cast_fp16 = expand_dims(axes = per_layer_slice_conv_21_axes_0, x = var_6856_cast_fp16)[name = string("per_layer_slice_conv_21_cast_fp16")]; + tensor input_327_cast_fp16 = mul(x = gated_63, y = per_layer_slice_conv_21_cast_fp16)[name = string("input_327_cast_fp16")]; + string gated_65_pad_type_0 = const()[name = string("gated_65_pad_type_0"), val = string("valid")]; + tensor gated_65_strides_0 = const()[name = string("gated_65_strides_0"), val = tensor([1, 1])]; + tensor gated_65_pad_0 = const()[name = string("gated_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_65_dilations_0 = const()[name = string("gated_65_dilations_0"), val = tensor([1, 1])]; + int32 gated_65_groups_0 = const()[name = string("gated_65_groups_0"), val = int32(1)]; + tensor layers_10_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580038208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580365952))))[name = string("layers_10_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_65_cast_fp16 = conv(dilations = gated_65_dilations_0, groups = gated_65_groups_0, pad = gated_65_pad_0, pad_type = gated_65_pad_type_0, strides = gated_65_strides_0, weight = layers_10_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_327_cast_fp16)[name = string("gated_65_cast_fp16")]; + tensor var_6872_axes_0 = const()[name = string("op_6872_axes_0"), val = tensor([2])]; + tensor var_6872_cast_fp16 = squeeze(axes = var_6872_axes_0, x = gated_65_cast_fp16)[name = string("op_6872_cast_fp16")]; + tensor var_6876 = const()[name = string("op_6876"), val = tensor([0, 2, 1])]; + int32 var_6882 = const()[name = string("op_6882"), val = int32(-1)]; + fp16 const_129_promoted_to_fp16 = const()[name = string("const_129_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_217_cast_fp16 = transpose(perm = var_6876, x = var_6872_cast_fp16)[name = string("transpose_18")]; + tensor var_6884_cast_fp16 = mul(x = x_217_cast_fp16, y = const_129_promoted_to_fp16)[name = string("op_6884_cast_fp16")]; + bool input_329_interleave_0 = const()[name = string("input_329_interleave_0"), val = bool(false)]; + tensor input_329_cast_fp16 = concat(axis = var_6882, interleave = input_329_interleave_0, values = (x_217_cast_fp16, var_6884_cast_fp16))[name = string("input_329_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_6879_to_fp16 = const()[name = string("op_6879_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_6879_to_fp16, x = input_329_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor var_6889_split_sizes_0 = const()[name = string("op_6889_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6889_axis_0 = const()[name = string("op_6889_axis_0"), val = int32(-1)]; + tensor var_6889_cast_fp16_0, tensor var_6889_cast_fp16_1 = split(axis = var_6889_axis_0, split_sizes = var_6889_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_6889_cast_fp16")]; + tensor layers_10_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_10_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580368576)))]; + tensor hidden_states_109_cast_fp16 = mul(x = var_6889_cast_fp16_0, y = layers_10_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_109_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_105_cast_fp16, y = hidden_states_109_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + tensor const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = tensor([0x1.3ep-1])]; + tensor x_219_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_130_promoted_to_fp16)[name = string("x_219_cast_fp16")]; + tensor var_6901_axes_0 = const()[name = string("op_6901_axes_0"), val = tensor([0])]; + tensor var_6901_cast_fp16 = squeeze(axes = var_6901_axes_0, x = K_sliding_out_cast_fp16)[name = string("op_6901_cast_fp16")]; + tensor var_6903_axes_0 = const()[name = string("op_6903_axes_0"), val = tensor([0])]; + tensor var_6903_cast_fp16 = squeeze(axes = var_6903_axes_0, x = V_sliding_out_cast_fp16)[name = string("op_6903_cast_fp16")]; + tensor var_6906_begin_0 = const()[name = string("op_6906_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_6906_end_0 = const()[name = string("op_6906_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_6906_end_mask_0 = const()[name = string("op_6906_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6906_squeeze_mask_0 = const()[name = string("op_6906_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6906_cast_fp16 = slice_by_index(begin = var_6906_begin_0, end = var_6906_end_0, end_mask = var_6906_end_mask_0, squeeze_mask = var_6906_squeeze_mask_0, x = K_full_in)[name = string("op_6906_cast_fp16")]; + tensor K_full_slot_axes_0 = const()[name = string("K_full_slot_axes_0"), val = tensor([0])]; + tensor K_full_slot_cast_fp16 = expand_dims(axes = K_full_slot_axes_0, x = var_6906_cast_fp16)[name = string("K_full_slot_cast_fp16")]; + tensor var_6911_begin_0 = const()[name = string("op_6911_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_6911_end_0 = const()[name = string("op_6911_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_6911_end_mask_0 = const()[name = string("op_6911_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6911_squeeze_mask_0 = const()[name = string("op_6911_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6911_cast_fp16 = slice_by_index(begin = var_6911_begin_0, end = var_6911_end_0, end_mask = var_6911_end_mask_0, squeeze_mask = var_6911_squeeze_mask_0, x = V_full_in)[name = string("op_6911_cast_fp16")]; + tensor V_full_slot_axes_0 = const()[name = string("V_full_slot_axes_0"), val = tensor([0])]; + tensor V_full_slot_cast_fp16 = expand_dims(axes = V_full_slot_axes_0, x = var_6911_cast_fp16)[name = string("V_full_slot_cast_fp16")]; + int32 var_6918 = const()[name = string("op_6918"), val = int32(-1)]; + fp16 const_131_promoted_to_fp16 = const()[name = string("const_131_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6920_cast_fp16 = mul(x = x_219_cast_fp16, y = const_131_promoted_to_fp16)[name = string("op_6920_cast_fp16")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331_cast_fp16 = concat(axis = var_6918, interleave = input_331_interleave_0, values = (x_219_cast_fp16, var_6920_cast_fp16))[name = string("input_331_cast_fp16")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_6915_to_fp16 = const()[name = string("op_6915_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_6915_to_fp16, x = input_331_cast_fp16)[name = string("normed_313_cast_fp16")]; + tensor var_6925_split_sizes_0 = const()[name = string("op_6925_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6925_axis_0 = const()[name = string("op_6925_axis_0"), val = int32(-1)]; + tensor var_6925_cast_fp16_0, tensor var_6925_cast_fp16_1 = split(axis = var_6925_axis_0, split_sizes = var_6925_split_sizes_0, x = normed_313_cast_fp16)[name = string("op_6925_cast_fp16")]; + tensor layers_11_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580373760)))]; + tensor h_67_cast_fp16 = mul(x = var_6925_cast_fp16_0, y = layers_11_input_layernorm_weight_promoted_to_fp16)[name = string("h_67_cast_fp16")]; + tensor var_6931 = const()[name = string("op_6931"), val = tensor([0, 2, 1])]; + tensor var_6934_axes_0 = const()[name = string("op_6934_axes_0"), val = tensor([2])]; + tensor var_6932_cast_fp16 = transpose(perm = var_6931, x = h_67_cast_fp16)[name = string("transpose_17")]; + tensor var_6934_cast_fp16 = expand_dims(axes = var_6934_axes_0, x = var_6932_cast_fp16)[name = string("op_6934_cast_fp16")]; + string var_6950_pad_type_0 = const()[name = string("op_6950_pad_type_0"), val = string("valid")]; + tensor var_6950_strides_0 = const()[name = string("op_6950_strides_0"), val = tensor([1, 1])]; + tensor var_6950_pad_0 = const()[name = string("op_6950_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6950_dilations_0 = const()[name = string("op_6950_dilations_0"), val = tensor([1, 1])]; + int32 var_6950_groups_0 = const()[name = string("op_6950_groups_0"), val = int32(1)]; + tensor var_6950 = conv(dilations = var_6950_dilations_0, groups = var_6950_groups_0, pad = var_6950_pad_0, pad_type = var_6950_pad_type_0, strides = var_6950_strides_0, weight = layers_11_self_attn_q_proj_weight_palettized, x = var_6934_cast_fp16)[name = string("op_6950")]; + tensor var_6955 = const()[name = string("op_6955"), val = tensor([1, 8, 512, 1])]; + tensor var_6956 = reshape(shape = var_6955, x = var_6950)[name = string("op_6956")]; + tensor var_6961 = const()[name = string("op_6961"), val = tensor([0, 1, 3, 2])]; + tensor var_6971 = const()[name = string("op_6971"), val = tensor([1, 8, 512])]; + tensor var_6962 = transpose(perm = var_6961, x = var_6956)[name = string("transpose_16")]; + tensor x_221 = reshape(shape = var_6971, x = var_6962)[name = string("x_221")]; + int32 var_6977 = const()[name = string("op_6977"), val = int32(-1)]; + fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; + tensor var_6979 = mul(x = x_221, y = const_132_promoted)[name = string("op_6979")]; + bool input_335_interleave_0 = const()[name = string("input_335_interleave_0"), val = bool(false)]; + tensor input_335 = concat(axis = var_6977, interleave = input_335_interleave_0, values = (x_221, var_6979))[name = string("input_335")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_6974_to_fp16 = const()[name = string("op_6974_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_6974_to_fp16, x = input_335)[name = string("normed_317_cast_fp16")]; + tensor var_6984_split_sizes_0 = const()[name = string("op_6984_split_sizes_0"), val = tensor([512, 512])]; + int32 var_6984_axis_0 = const()[name = string("op_6984_axis_0"), val = int32(-1)]; + tensor var_6984_0, tensor var_6984_1 = split(axis = var_6984_axis_0, split_sizes = var_6984_split_sizes_0, x = normed_317_cast_fp16)[name = string("op_6984")]; + tensor var_6986 = mul(x = var_6984_0, y = layers_11_self_attn_q_norm_weight)[name = string("op_6986")]; + tensor var_6991 = const()[name = string("op_6991"), val = tensor([1, 8, 1, 512])]; + tensor q_91 = reshape(shape = var_6991, x = var_6986)[name = string("q_91")]; + tensor var_6993_cast_fp16 = mul(x = q_91, y = cos_f)[name = string("op_6993_cast_fp16")]; + tensor var_6994_split_sizes_0 = const()[name = string("op_6994_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6994_axis_0 = const()[name = string("op_6994_axis_0"), val = int32(-1)]; + tensor var_6994_0, tensor var_6994_1 = split(axis = var_6994_axis_0, split_sizes = var_6994_split_sizes_0, x = q_91)[name = string("op_6994")]; + fp16 const_133_promoted = const()[name = string("const_133_promoted"), val = fp16(-0x1p+0)]; + tensor var_6996 = mul(x = var_6994_1, y = const_133_promoted)[name = string("op_6996")]; + int32 var_6998 = const()[name = string("op_6998"), val = int32(-1)]; + bool var_6999_interleave_0 = const()[name = string("op_6999_interleave_0"), val = bool(false)]; + tensor var_6999 = concat(axis = var_6998, interleave = var_6999_interleave_0, values = (var_6996, var_6994_0))[name = string("op_6999")]; + tensor var_7000_cast_fp16 = mul(x = var_6999, y = sin_f)[name = string("op_7000_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_6993_cast_fp16, y = var_7000_cast_fp16)[name = string("q_cast_fp16")]; + string var_7013_pad_type_0 = const()[name = string("op_7013_pad_type_0"), val = string("valid")]; + tensor var_7013_strides_0 = const()[name = string("op_7013_strides_0"), val = tensor([1, 1])]; + tensor var_7013_pad_0 = const()[name = string("op_7013_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7013_dilations_0 = const()[name = string("op_7013_dilations_0"), val = tensor([1, 1])]; + int32 var_7013_groups_0 = const()[name = string("op_7013_groups_0"), val = int32(1)]; + tensor var_7013 = conv(dilations = var_7013_dilations_0, groups = var_7013_groups_0, pad = var_7013_pad_0, pad_type = var_7013_pad_type_0, strides = var_7013_strides_0, weight = layers_11_self_attn_k_proj_weight_palettized, x = var_6934_cast_fp16)[name = string("op_7013")]; + tensor var_7018 = const()[name = string("op_7018"), val = tensor([1, 2, 512, 1])]; + tensor var_7019 = reshape(shape = var_7018, x = var_7013)[name = string("op_7019")]; + tensor var_7024 = const()[name = string("op_7024"), val = tensor([0, 1, 3, 2])]; + string var_7041_pad_type_0 = const()[name = string("op_7041_pad_type_0"), val = string("valid")]; + tensor var_7041_strides_0 = const()[name = string("op_7041_strides_0"), val = tensor([1, 1])]; + tensor var_7041_pad_0 = const()[name = string("op_7041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7041_dilations_0 = const()[name = string("op_7041_dilations_0"), val = tensor([1, 1])]; + int32 var_7041_groups_0 = const()[name = string("op_7041_groups_0"), val = int32(1)]; + tensor var_7041 = conv(dilations = var_7041_dilations_0, groups = var_7041_groups_0, pad = var_7041_pad_0, pad_type = var_7041_pad_type_0, strides = var_7041_strides_0, weight = layers_11_self_attn_v_proj_weight_palettized, x = var_6934_cast_fp16)[name = string("op_7041")]; + tensor var_7046 = const()[name = string("op_7046"), val = tensor([1, 2, 512, 1])]; + tensor var_7047 = reshape(shape = var_7046, x = var_7041)[name = string("op_7047")]; + tensor var_7052 = const()[name = string("op_7052"), val = tensor([0, 1, 3, 2])]; + tensor var_7062 = const()[name = string("op_7062"), val = tensor([1, 2, 512])]; + tensor var_7025 = transpose(perm = var_7024, x = var_7019)[name = string("transpose_15")]; + tensor x_223 = reshape(shape = var_7062, x = var_7025)[name = string("x_223")]; + int32 var_7068 = const()[name = string("op_7068"), val = int32(-1)]; + fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; + tensor var_7070 = mul(x = x_223, y = const_134_promoted)[name = string("op_7070")]; + bool input_337_interleave_0 = const()[name = string("input_337_interleave_0"), val = bool(false)]; + tensor input_337 = concat(axis = var_7068, interleave = input_337_interleave_0, values = (x_223, var_7070))[name = string("input_337")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_7065_to_fp16 = const()[name = string("op_7065_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_7065_to_fp16, x = input_337)[name = string("normed_321_cast_fp16")]; + tensor var_7075_split_sizes_0 = const()[name = string("op_7075_split_sizes_0"), val = tensor([512, 512])]; + int32 var_7075_axis_0 = const()[name = string("op_7075_axis_0"), val = int32(-1)]; + tensor var_7075_0, tensor var_7075_1 = split(axis = var_7075_axis_0, split_sizes = var_7075_split_sizes_0, x = normed_321_cast_fp16)[name = string("op_7075")]; + tensor var_7077 = mul(x = var_7075_0, y = layers_11_self_attn_k_norm_weight)[name = string("op_7077")]; + tensor var_7082 = const()[name = string("op_7082"), val = tensor([1, 2, 1, 512])]; + tensor q_93 = reshape(shape = var_7082, x = var_7077)[name = string("q_93")]; + fp16 var_7084_promoted = const()[name = string("op_7084_promoted"), val = fp16(0x1p+1)]; + tensor var_7053 = transpose(perm = var_7052, x = var_7047)[name = string("transpose_14")]; + tensor var_7085 = pow(x = var_7053, y = var_7084_promoted)[name = string("op_7085")]; + tensor var_7090_axes_0 = const()[name = string("op_7090_axes_0"), val = tensor([-1])]; + bool var_7090_keep_dims_0 = const()[name = string("op_7090_keep_dims_0"), val = bool(true)]; + tensor var_7090 = reduce_mean(axes = var_7090_axes_0, keep_dims = var_7090_keep_dims_0, x = var_7085)[name = string("op_7090")]; + fp16 var_7092_to_fp16 = const()[name = string("op_7092_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_cast_fp16 = add(x = var_7090, y = var_7092_to_fp16)[name = string("mean_sq_cast_fp16")]; + fp32 var_7094_epsilon_0 = const()[name = string("op_7094_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7094_cast_fp16 = rsqrt(epsilon = var_7094_epsilon_0, x = mean_sq_cast_fp16)[name = string("op_7094_cast_fp16")]; + tensor v_cast_fp16 = mul(x = var_7053, y = var_7094_cast_fp16)[name = string("v_cast_fp16")]; + tensor var_7096_cast_fp16 = mul(x = q_93, y = cos_f)[name = string("op_7096_cast_fp16")]; + tensor var_7097_split_sizes_0 = const()[name = string("op_7097_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7097_axis_0 = const()[name = string("op_7097_axis_0"), val = int32(-1)]; + tensor var_7097_0, tensor var_7097_1 = split(axis = var_7097_axis_0, split_sizes = var_7097_split_sizes_0, x = q_93)[name = string("op_7097")]; + fp16 const_135_promoted = const()[name = string("const_135_promoted"), val = fp16(-0x1p+0)]; + tensor var_7099 = mul(x = var_7097_1, y = const_135_promoted)[name = string("op_7099")]; + int32 var_7101 = const()[name = string("op_7101"), val = int32(-1)]; + bool var_7102_interleave_0 = const()[name = string("op_7102_interleave_0"), val = bool(false)]; + tensor var_7102 = concat(axis = var_7101, interleave = var_7102_interleave_0, values = (var_7099, var_7097_0))[name = string("op_7102")]; + tensor var_7103_cast_fp16 = mul(x = var_7102, y = sin_f)[name = string("op_7103_cast_fp16")]; + tensor k_cast_fp16 = add(x = var_7096_cast_fp16, y = var_7103_cast_fp16)[name = string("k_cast_fp16")]; + tensor var_7109_cast_fp16 = mul(x = K_full_slot_cast_fp16, y = var_3796_cast_fp16)[name = string("op_7109_cast_fp16")]; + tensor var_7110_reps_0 = const()[name = string("op_7110_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_7110_cast_fp16 = tile(reps = var_7110_reps_0, x = k_cast_fp16)[name = string("op_7110_cast_fp16")]; + tensor var_7111_cast_fp16 = mul(x = var_7110_cast_fp16, y = update_mask)[name = string("op_7111_cast_fp16")]; + tensor K_full_out_cast_fp16 = add(x = var_7109_cast_fp16, y = var_7111_cast_fp16)[name = string("K_full_out_cast_fp16")]; + tensor var_7117_cast_fp16 = mul(x = V_full_slot_cast_fp16, y = var_3796_cast_fp16)[name = string("op_7117_cast_fp16")]; + tensor var_7118_reps_0 = const()[name = string("op_7118_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_7118_cast_fp16 = tile(reps = var_7118_reps_0, x = v_cast_fp16)[name = string("op_7118_cast_fp16")]; + tensor var_7119_cast_fp16 = mul(x = var_7118_cast_fp16, y = update_mask)[name = string("op_7119_cast_fp16")]; + tensor V_full_out_cast_fp16 = add(x = var_7117_cast_fp16, y = var_7119_cast_fp16)[name = string("V_full_out_cast_fp16")]; + tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_22_reps_0 = const()[name = string("tile_22_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_44_cast_fp16 = transpose(perm = transpose_44_perm_0, x = K_full_out_cast_fp16)[name = string("transpose_13")]; + tensor tile_22_cast_fp16 = tile(reps = tile_22_reps_0, x = transpose_44_cast_fp16)[name = string("tile_22_cast_fp16")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_44_cast_fp16 = reshape(shape = concat_44, x = tile_22_cast_fp16)[name = string("reshape_44_cast_fp16")]; + tensor transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_45 = const()[name = string("concat_45"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_45_cast_fp16 = transpose(perm = transpose_45_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_12")]; + tensor reshape_45_cast_fp16 = reshape(shape = concat_45, x = transpose_45_cast_fp16)[name = string("reshape_45_cast_fp16")]; + tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_23_reps_0 = const()[name = string("tile_23_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_46_cast_fp16 = transpose(perm = transpose_46_perm_0, x = V_full_out_cast_fp16)[name = string("transpose_11")]; + tensor tile_23_cast_fp16 = tile(reps = tile_23_reps_0, x = transpose_46_cast_fp16)[name = string("tile_23_cast_fp16")]; + tensor concat_46 = const()[name = string("concat_46"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_46_cast_fp16 = reshape(shape = concat_46, x = tile_23_cast_fp16)[name = string("reshape_46_cast_fp16")]; + tensor transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_47 = const()[name = string("concat_47"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_47_cast_fp16 = transpose(perm = transpose_47_perm_0, x = reshape_46_cast_fp16)[name = string("transpose_10")]; + tensor reshape_47_cast_fp16 = reshape(shape = concat_47, x = transpose_47_cast_fp16)[name = string("reshape_47_cast_fp16")]; + tensor V_expanded_perm_0 = const()[name = string("V_expanded_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_45_transpose_x_0 = const()[name = string("attn_weights_45_transpose_x_0"), val = bool(false)]; + bool attn_weights_45_transpose_y_0 = const()[name = string("attn_weights_45_transpose_y_0"), val = bool(false)]; + tensor transpose_59_cast_fp16 = transpose(perm = transpose_59_perm_0, x = reshape_45_cast_fp16)[name = string("transpose_9")]; + tensor attn_weights_45_cast_fp16 = matmul(transpose_x = attn_weights_45_transpose_x_0, transpose_y = attn_weights_45_transpose_y_0, x = q_cast_fp16, y = transpose_59_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + tensor x_227_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask_full)[name = string("x_227_cast_fp16")]; + tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; + bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; + tensor reduce_max_11 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = x_227_cast_fp16)[name = string("reduce_max_11")]; + tensor var_7161 = sub(x = x_227_cast_fp16, y = reduce_max_11)[name = string("op_7161")]; + tensor var_7167 = exp(x = var_7161)[name = string("op_7167")]; + tensor var_7177_axes_0 = const()[name = string("op_7177_axes_0"), val = tensor([-1])]; + bool var_7177_keep_dims_0 = const()[name = string("op_7177_keep_dims_0"), val = bool(true)]; + tensor var_7177 = reduce_sum(axes = var_7177_axes_0, keep_dims = var_7177_keep_dims_0, x = var_7167)[name = string("op_7177")]; + tensor var_7183_cast_fp16 = real_div(x = var_7167, y = var_7177)[name = string("op_7183_cast_fp16")]; + bool attn_output_67_transpose_x_0 = const()[name = string("attn_output_67_transpose_x_0"), val = bool(false)]; + bool attn_output_67_transpose_y_0 = const()[name = string("attn_output_67_transpose_y_0"), val = bool(false)]; + tensor V_expanded_cast_fp16 = transpose(perm = V_expanded_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_8")]; + tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_0, transpose_y = attn_output_67_transpose_y_0, x = var_7183_cast_fp16, y = V_expanded_cast_fp16)[name = string("attn_output_67_cast_fp16")]; + tensor var_7194 = const()[name = string("op_7194"), val = tensor([0, 2, 1, 3])]; + tensor var_7201 = const()[name = string("op_7201"), val = tensor([1, 1, -1])]; + tensor var_7195_cast_fp16 = transpose(perm = var_7194, x = attn_output_67_cast_fp16)[name = string("transpose_7")]; + tensor attn_output_69_cast_fp16 = reshape(shape = var_7201, x = var_7195_cast_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor var_7206 = const()[name = string("op_7206"), val = tensor([0, 2, 1])]; + string var_7222_pad_type_0 = const()[name = string("op_7222_pad_type_0"), val = string("valid")]; + int32 var_7222_groups_0 = const()[name = string("op_7222_groups_0"), val = int32(1)]; + tensor var_7222_strides_0 = const()[name = string("op_7222_strides_0"), val = tensor([1])]; + tensor var_7222_pad_0 = const()[name = string("op_7222_pad_0"), val = tensor([0, 0])]; + tensor var_7222_dilations_0 = const()[name = string("op_7222_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580378944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585621888))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7207_cast_fp16 = transpose(perm = var_7206, x = attn_output_69_cast_fp16)[name = string("transpose_6")]; + tensor var_7222_cast_fp16 = conv(dilations = var_7222_dilations_0, groups = var_7222_groups_0, pad = var_7222_pad_0, pad_type = var_7222_pad_type_0, strides = var_7222_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7207_cast_fp16)[name = string("op_7222_cast_fp16")]; + tensor var_7226 = const()[name = string("op_7226"), val = tensor([0, 2, 1])]; + int32 var_7232 = const()[name = string("op_7232"), val = int32(-1)]; + fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_231_cast_fp16 = transpose(perm = var_7226, x = var_7222_cast_fp16)[name = string("transpose_5")]; + tensor var_7234_cast_fp16 = mul(x = x_231_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_7234_cast_fp16")]; + bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; + tensor input_341_cast_fp16 = concat(axis = var_7232, interleave = input_341_interleave_0, values = (x_231_cast_fp16, var_7234_cast_fp16))[name = string("input_341_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_7229_to_fp16 = const()[name = string("op_7229_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_7229_to_fp16, x = input_341_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor var_7239_split_sizes_0 = const()[name = string("op_7239_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7239_axis_0 = const()[name = string("op_7239_axis_0"), val = int32(-1)]; + tensor var_7239_cast_fp16_0, tensor var_7239_cast_fp16_1 = split(axis = var_7239_axis_0, split_sizes = var_7239_split_sizes_0, x = normed_325_cast_fp16)[name = string("op_7239_cast_fp16")]; + tensor layers_11_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585624512)))]; + tensor attn_output_cast_fp16 = mul(x = var_7239_cast_fp16_0, y = layers_11_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_233_cast_fp16 = add(x = x_219_cast_fp16, y = attn_output_cast_fp16)[name = string("x_233_cast_fp16")]; + int32 var_7248 = const()[name = string("op_7248"), val = int32(-1)]; + fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7250_cast_fp16 = mul(x = x_233_cast_fp16, y = const_137_promoted_to_fp16)[name = string("op_7250_cast_fp16")]; + bool input_343_interleave_0 = const()[name = string("input_343_interleave_0"), val = bool(false)]; + tensor input_343_cast_fp16 = concat(axis = var_7248, interleave = input_343_interleave_0, values = (x_233_cast_fp16, var_7250_cast_fp16))[name = string("input_343_cast_fp16")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_7245_to_fp16 = const()[name = string("op_7245_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_7245_to_fp16, x = input_343_cast_fp16)[name = string("normed_329_cast_fp16")]; + tensor var_7255_split_sizes_0 = const()[name = string("op_7255_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7255_axis_0 = const()[name = string("op_7255_axis_0"), val = int32(-1)]; + tensor var_7255_cast_fp16_0, tensor var_7255_cast_fp16_1 = split(axis = var_7255_axis_0, split_sizes = var_7255_split_sizes_0, x = normed_329_cast_fp16)[name = string("op_7255_cast_fp16")]; + tensor layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585629696)))]; + tensor h_69_cast_fp16 = mul(x = var_7255_cast_fp16_0, y = layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_69_cast_fp16")]; + tensor var_7266 = const()[name = string("op_7266"), val = tensor([0, 2, 1])]; + tensor input_345_axes_0 = const()[name = string("input_345_axes_0"), val = tensor([2])]; + tensor var_7267 = transpose(perm = var_7266, x = h_69_cast_fp16)[name = string("transpose_4")]; + tensor input_345 = expand_dims(axes = input_345_axes_0, x = var_7267)[name = string("input_345")]; + string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; + tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; + tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; + int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; + tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = layers_11_mlp_gate_proj_weight_palettized, x = input_345)[name = string("gate_45")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_11_mlp_up_proj_weight_palettized, x = input_345)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_45)[name = string("gate")]; + tensor input_347 = mul(x = gate, y = up)[name = string("input_347")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_11_mlp_down_proj_weight_palettized, x = input_347)[name = string("mlp_out")]; + tensor var_7307_axes_0 = const()[name = string("op_7307_axes_0"), val = tensor([2])]; + tensor var_7307 = squeeze(axes = var_7307_axes_0, x = mlp_out)[name = string("op_7307")]; + tensor var_7311 = const()[name = string("op_7311"), val = tensor([0, 2, 1])]; + int32 var_7317 = const()[name = string("op_7317"), val = int32(-1)]; + fp16 const_138_promoted = const()[name = string("const_138_promoted"), val = fp16(-0x1p+0)]; + tensor x_235 = transpose(perm = var_7311, x = var_7307)[name = string("transpose_3")]; + tensor var_7319 = mul(x = x_235, y = const_138_promoted)[name = string("op_7319")]; + bool input_349_interleave_0 = const()[name = string("input_349_interleave_0"), val = bool(false)]; + tensor input_349 = concat(axis = var_7317, interleave = input_349_interleave_0, values = (x_235, var_7319))[name = string("input_349")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_7314_to_fp16 = const()[name = string("op_7314_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_7314_to_fp16, x = input_349)[name = string("normed_333_cast_fp16")]; + tensor var_7324_split_sizes_0 = const()[name = string("op_7324_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7324_axis_0 = const()[name = string("op_7324_axis_0"), val = int32(-1)]; + tensor var_7324_0, tensor var_7324_1 = split(axis = var_7324_axis_0, split_sizes = var_7324_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_7324")]; + tensor hidden_states_113 = mul(x = var_7324_0, y = layers_11_post_feedforward_layernorm_weight)[name = string("hidden_states_113")]; + tensor hidden_states_115_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_113)[name = string("hidden_states_115_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 2816])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 1, 3072])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_cast_fp16")]; + tensor var_7352 = const()[name = string("op_7352"), val = tensor([0, 2, 1])]; + tensor input_351_axes_0 = const()[name = string("input_351_axes_0"), val = tensor([2])]; + tensor var_7353 = transpose(perm = var_7352, x = hidden_states_115_cast_fp16)[name = string("transpose_2")]; + tensor input_351 = expand_dims(axes = input_351_axes_0, x = var_7353)[name = string("input_351")]; + string gated_67_pad_type_0 = const()[name = string("gated_67_pad_type_0"), val = string("valid")]; + tensor gated_67_strides_0 = const()[name = string("gated_67_strides_0"), val = tensor([1, 1])]; + tensor gated_67_pad_0 = const()[name = string("gated_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_67_dilations_0 = const()[name = string("gated_67_dilations_0"), val = tensor([1, 1])]; + int32 gated_67_groups_0 = const()[name = string("gated_67_groups_0"), val = int32(1)]; + tensor gated_67 = conv(dilations = gated_67_dilations_0, groups = gated_67_groups_0, pad = gated_67_pad_0, pad_type = gated_67_pad_type_0, strides = gated_67_strides_0, weight = layers_11_per_layer_input_gate_weight_palettized, x = input_351)[name = string("gated_67")]; + string gated_69_mode_0 = const()[name = string("gated_69_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_69 = gelu(mode = gated_69_mode_0, x = gated_67)[name = string("gated_69")]; + tensor var_7372 = const()[name = string("op_7372"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_7373_cast_fp16 = transpose(perm = var_7372, x = per_layer_slice_cast_fp16)[name = string("transpose_1")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_7373_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_353_cast_fp16 = mul(x = gated_69, y = per_layer_slice_conv_cast_fp16)[name = string("input_353_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_11_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585634880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585962624))))[name = string("layers_11_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_11_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_353_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_7389_axes_0 = const()[name = string("op_7389_axes_0"), val = tensor([2])]; + tensor var_7389_cast_fp16 = squeeze(axes = var_7389_axes_0, x = gated_cast_fp16)[name = string("op_7389_cast_fp16")]; + tensor var_7393 = const()[name = string("op_7393"), val = tensor([0, 2, 1])]; + int32 var_7399 = const()[name = string("op_7399"), val = int32(-1)]; + fp16 const_139_promoted_to_fp16 = const()[name = string("const_139_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_cast_fp16 = transpose(perm = var_7393, x = var_7389_cast_fp16)[name = string("transpose_0")]; + tensor var_7401_cast_fp16 = mul(x = x_cast_fp16, y = const_139_promoted_to_fp16)[name = string("op_7401_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_7399, interleave = input_interleave_0, values = (x_cast_fp16, var_7401_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_7396_to_fp16 = const()[name = string("op_7396_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_7396_to_fp16, x = input_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor var_7406_split_sizes_0 = const()[name = string("op_7406_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7406_axis_0 = const()[name = string("op_7406_axis_0"), val = int32(-1)]; + tensor var_7406_cast_fp16_0, tensor var_7406_cast_fp16_1 = split(axis = var_7406_axis_0, split_sizes = var_7406_split_sizes_0, x = normed_337_cast_fp16)[name = string("op_7406_cast_fp16")]; + tensor layers_11_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_11_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585965248)))]; + tensor hidden_states_119_cast_fp16 = mul(x = var_7406_cast_fp16_0, y = layers_11_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_115_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = tensor([0x1.0ap-1])]; + tensor hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_7416_cast_fp16")]; + tensor var_7418_axes_0 = const()[name = string("op_7418_axes_0"), val = tensor([0])]; + tensor var_7418_cast_fp16 = squeeze(axes = var_7418_axes_0, x = K_full_out_cast_fp16)[name = string("op_7418_cast_fp16")]; + tensor var_7420_axes_0 = const()[name = string("op_7420_axes_0"), val = tensor([0])]; + tensor var_7420_cast_fp16 = squeeze(axes = var_7420_axes_0, x = V_full_out_cast_fp16)[name = string("op_7420_cast_fp16")]; + int32 var_7423_axis_0 = const()[name = string("op_7423_axis_0"), val = int32(0)]; + tensor K_sliding_out = stack(axis = var_7423_axis_0, values = (var_1353_cast_fp16, var_1912_cast_fp16, var_2471_cast_fp16, var_3030_cast_fp16, var_3589_cast_fp16, var_4665_cast_fp16, var_5224_cast_fp16, var_5783_cast_fp16, var_6342_cast_fp16, var_6901_cast_fp16))[name = string("op_7423_cast_fp16")]; + int32 var_7426_axis_0 = const()[name = string("op_7426_axis_0"), val = int32(0)]; + tensor V_sliding_out = stack(axis = var_7426_axis_0, values = (var_1355_cast_fp16, var_1914_cast_fp16, var_2473_cast_fp16, var_3032_cast_fp16, var_3591_cast_fp16, var_4667_cast_fp16, var_5226_cast_fp16, var_5785_cast_fp16, var_6344_cast_fp16, var_6903_cast_fp16))[name = string("op_7426_cast_fp16")]; + int32 var_7429_axis_0 = const()[name = string("op_7429_axis_0"), val = int32(0)]; + tensor K_full_out = stack(axis = var_7429_axis_0, values = (var_4106_cast_fp16, var_7418_cast_fp16))[name = string("op_7429_cast_fp16")]; + int32 var_7432_axis_0 = const()[name = string("op_7432_axis_0"), val = int32(0)]; + tensor V_full_out = stack(axis = var_7432_axis_0, values = (var_4108_cast_fp16, var_7420_cast_fp16))[name = string("op_7432_cast_fp16")]; + } -> (hidden_states_out, K_sliding_out, V_sliding_out, K_full_out, V_full_out, per_layer_combined_out); + func verify_qK(tensor K_full_in, tensor K_sliding_in, tensor V_full_in, tensor V_sliding_in, tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor per_layer_raw, tensor sin_f, tensor sin_s, tensor update_indicator) { + tensor per_layer_model_projection_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13762688))))[name = string("per_layer_model_projection_weight_palettized")]; + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13773504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16395008))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16397120)))]; + tensor layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16397696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17053120))))[name = string("layers_0_self_attn_k_proj_weight_palettized")]; + tensor layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17053696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17709120))))[name = string("layers_0_self_attn_v_proj_weight_palettized")]; + tensor layers_0_self_attn_k_norm_weight = const()[name = string("layers_0_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17709696)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17710272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30817536))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30827840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43935104))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43945408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57052672))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57055296)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57060480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57388224))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57388544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60010048))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_self_attn_q_norm_weight = const()[name = string("layers_1_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60012160)))]; + tensor layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60012736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60668160))))[name = string("layers_1_self_attn_k_proj_weight_palettized")]; + tensor layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60668736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61324160))))[name = string("layers_1_self_attn_v_proj_weight_palettized")]; + tensor layers_1_self_attn_k_norm_weight = const()[name = string("layers_1_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61324736)))]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61325312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74432576))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74442880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87550144))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87560448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100667712))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100670336)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100675520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101003264))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101003584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103625088))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_self_attn_q_norm_weight = const()[name = string("layers_2_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103627200)))]; + tensor layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103627776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104283200))))[name = string("layers_2_self_attn_k_proj_weight_palettized")]; + tensor layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104283776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104939200))))[name = string("layers_2_self_attn_v_proj_weight_palettized")]; + tensor layers_2_self_attn_k_norm_weight = const()[name = string("layers_2_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104939776)))]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104940352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118047616))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118057920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131165184))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131175488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144282752))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144285376)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144290560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144618304))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144618624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147240128))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_self_attn_q_norm_weight = const()[name = string("layers_3_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147242240)))]; + tensor layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147242816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147898240))))[name = string("layers_3_self_attn_k_proj_weight_palettized")]; + tensor layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147898816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148554240))))[name = string("layers_3_self_attn_v_proj_weight_palettized")]; + tensor layers_3_self_attn_k_norm_weight = const()[name = string("layers_3_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148554816)))]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148555392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161662656))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161672960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174780224))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174790528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187897792))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187900416)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187905600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188233344))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188233664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190855168))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_self_attn_q_norm_weight = const()[name = string("layers_4_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190857280)))]; + tensor layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190857856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191513280))))[name = string("layers_4_self_attn_k_proj_weight_palettized")]; + tensor layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191513856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192169280))))[name = string("layers_4_self_attn_v_proj_weight_palettized")]; + tensor layers_4_self_attn_k_norm_weight = const()[name = string("layers_4_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192169856)))]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192170432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205277696))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205288000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218395264))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218405568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231512832))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231515456)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231520640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231848384))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231848704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237091648))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_self_attn_q_norm_weight = const()[name = string("layers_5_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237095808)))]; + tensor layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237096896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238407680))))[name = string("layers_5_self_attn_k_proj_weight_palettized")]; + tensor layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238408768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239719552))))[name = string("layers_5_self_attn_v_proj_weight_palettized")]; + tensor layers_5_self_attn_k_norm_weight = const()[name = string("layers_5_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239720640)))]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239721728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252828992))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252839296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265946560))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265956864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279064128))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279066752)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279071936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279399680))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279400000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282021504))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282023616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282679040))))[name = string("layers_6_self_attn_k_proj_weight_palettized")]; + tensor layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282679616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283335040))))[name = string("layers_6_self_attn_v_proj_weight_palettized")]; + tensor layers_6_self_attn_k_norm_weight = const()[name = string("layers_6_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283335616)))]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283336192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296443456))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296453760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309561024))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309571328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322678592))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322681216)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322686400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323014144))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323014464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325635968))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_self_attn_q_norm_weight = const()[name = string("layers_7_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325638080)))]; + tensor layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325638656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326294080))))[name = string("layers_7_self_attn_k_proj_weight_palettized")]; + tensor layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326294656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326950080))))[name = string("layers_7_self_attn_v_proj_weight_palettized")]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326950656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340057920))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340068224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353175488))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353185792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366293056))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366295680)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366300864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366628608))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366628928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369250432))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_self_attn_q_norm_weight = const()[name = string("layers_8_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369252544)))]; + tensor layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369253120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369908544))))[name = string("layers_8_self_attn_k_proj_weight_palettized")]; + tensor layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369909120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370564544))))[name = string("layers_8_self_attn_v_proj_weight_palettized")]; + tensor layers_8_self_attn_k_norm_weight = const()[name = string("layers_8_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370565120)))]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370565696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383672960))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383683264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396790528))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396800832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409908096))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409910720)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409915904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410243648))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + tensor layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410243968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412865472))))[name = string("layers_9_self_attn_q_proj_weight_palettized")]; + tensor layers_9_self_attn_q_norm_weight = const()[name = string("layers_9_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412867584)))]; + tensor layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412868160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413523584))))[name = string("layers_9_self_attn_k_proj_weight_palettized")]; + tensor layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413524160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414179584))))[name = string("layers_9_self_attn_v_proj_weight_palettized")]; + tensor layers_9_self_attn_k_norm_weight = const()[name = string("layers_9_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414180160)))]; + tensor layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414180736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427288000))))[name = string("layers_9_mlp_gate_proj_weight_palettized")]; + tensor layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427298304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440405568))))[name = string("layers_9_mlp_up_proj_weight_palettized")]; + tensor layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440415872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453523136))))[name = string("layers_9_mlp_down_proj_weight_palettized")]; + tensor layers_9_post_feedforward_layernorm_weight = const()[name = string("layers_9_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453525760)))]; + tensor layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453530944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453858688))))[name = string("layers_9_per_layer_input_gate_weight_palettized")]; + tensor layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453859008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456480512))))[name = string("layers_10_self_attn_q_proj_weight_palettized")]; + tensor layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(456482624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457138048))))[name = string("layers_10_self_attn_k_proj_weight_palettized")]; + tensor layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457138624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457794048))))[name = string("layers_10_self_attn_v_proj_weight_palettized")]; + tensor layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457794624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470901888))))[name = string("layers_10_mlp_gate_proj_weight_palettized")]; + tensor layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470912192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484019456))))[name = string("layers_10_mlp_up_proj_weight_palettized")]; + tensor layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484029760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497137024))))[name = string("layers_10_mlp_down_proj_weight_palettized")]; + tensor layers_10_post_feedforward_layernorm_weight = const()[name = string("layers_10_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497139648)))]; + tensor layers_10_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497144832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497472576))))[name = string("layers_10_per_layer_input_gate_weight_palettized")]; + tensor layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497472896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502715840))))[name = string("layers_11_self_attn_q_proj_weight_palettized")]; + tensor layers_11_self_attn_q_norm_weight = const()[name = string("layers_11_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502720000)))]; + tensor layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(502721088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504031872))))[name = string("layers_11_self_attn_k_proj_weight_palettized")]; + tensor layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504032960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505343744))))[name = string("layers_11_self_attn_v_proj_weight_palettized")]; + tensor layers_11_self_attn_k_norm_weight = const()[name = string("layers_11_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505344832)))]; + tensor layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505345920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518453184))))[name = string("layers_11_mlp_gate_proj_weight_palettized")]; + tensor layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518463488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531570752))))[name = string("layers_11_mlp_up_proj_weight_palettized")]; + tensor layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531581056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544688320))))[name = string("layers_11_mlp_down_proj_weight_palettized")]; + tensor layers_11_post_feedforward_layernorm_weight = const()[name = string("layers_11_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544690944)))]; + tensor layers_11_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544696128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545023872))))[name = string("layers_11_per_layer_input_gate_weight_palettized")]; + tensor var_740 = const()[name = string("op_740"), val = tensor([0, 2, 1])]; + tensor var_743_axes_0 = const()[name = string("op_743_axes_0"), val = tensor([2])]; + tensor var_741_cast_fp16 = transpose(perm = var_740, x = hidden_states)[name = string("transpose_241")]; + tensor var_743_cast_fp16 = expand_dims(axes = var_743_axes_0, x = var_741_cast_fp16)[name = string("op_743_cast_fp16")]; + string var_759_pad_type_0 = const()[name = string("op_759_pad_type_0"), val = string("valid")]; + tensor var_759_strides_0 = const()[name = string("op_759_strides_0"), val = tensor([1, 1])]; + tensor var_759_pad_0 = const()[name = string("op_759_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_759_dilations_0 = const()[name = string("op_759_dilations_0"), val = tensor([1, 1])]; + int32 var_759_groups_0 = const()[name = string("op_759_groups_0"), val = int32(1)]; + tensor var_759 = conv(dilations = var_759_dilations_0, groups = var_759_groups_0, pad = var_759_pad_0, pad_type = var_759_pad_type_0, strides = var_759_strides_0, weight = per_layer_model_projection_weight_palettized, x = var_743_cast_fp16)[name = string("op_759")]; + fp16 var_760_to_fp16 = const()[name = string("op_760_to_fp16"), val = fp16(0x1.43cp-6)]; + tensor proj_1_cast_fp16 = mul(x = var_759, y = var_760_to_fp16)[name = string("proj_1_cast_fp16")]; + tensor var_763_axes_0 = const()[name = string("op_763_axes_0"), val = tensor([2])]; + tensor var_763_cast_fp16 = squeeze(axes = var_763_axes_0, x = proj_1_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor var_767 = const()[name = string("op_767"), val = tensor([0, 2, 1])]; + tensor var_774 = const()[name = string("op_774"), val = tensor([3, 42, 256])]; + tensor proj_3_cast_fp16 = transpose(perm = var_767, x = var_763_cast_fp16)[name = string("transpose_240")]; + tensor proj_grouped_cast_fp16 = reshape(shape = var_774, x = proj_3_cast_fp16)[name = string("proj_grouped_cast_fp16")]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_776_cast_fp16 = mul(x = proj_grouped_cast_fp16, y = const_0_promoted_to_fp16)[name = string("op_776_cast_fp16")]; + int32 var_778 = const()[name = string("op_778"), val = int32(-1)]; + bool input_3_interleave_0 = const()[name = string("input_3_interleave_0"), val = bool(false)]; + tensor input_3_cast_fp16 = concat(axis = var_778, interleave = input_3_interleave_0, values = (proj_grouped_cast_fp16, var_776_cast_fp16))[name = string("input_3_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_784_to_fp16 = const()[name = string("op_784_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_784_to_fp16, x = input_3_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_787_split_sizes_0 = const()[name = string("op_787_split_sizes_0"), val = tensor([256, 256])]; + int32 var_787_axis_0 = const()[name = string("op_787_axis_0"), val = int32(-1)]; + tensor var_787_cast_fp16_0, tensor var_787_cast_fp16_1 = split(axis = var_787_axis_0, split_sizes = var_787_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_787_cast_fp16")]; + tensor per_layer_projection_norm_weight_promoted_to_fp16 = const()[name = string("per_layer_projection_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545024192)))]; + tensor var_789_cast_fp16 = mul(x = var_787_cast_fp16_0, y = per_layer_projection_norm_weight_promoted_to_fp16)[name = string("op_789_cast_fp16")]; + tensor var_793 = const()[name = string("op_793"), val = tensor([1, 3, 10752])]; + tensor proj_normed_cast_fp16 = reshape(shape = var_793, x = var_789_cast_fp16)[name = string("proj_normed_cast_fp16")]; + tensor var_796_cast_fp16 = add(x = proj_normed_cast_fp16, y = per_layer_raw)[name = string("op_796_cast_fp16")]; + fp16 var_797_to_fp16 = const()[name = string("op_797_to_fp16"), val = fp16(0x1.6ap-1)]; + tensor per_layer_combined_out = mul(x = var_796_cast_fp16, y = var_797_to_fp16)[name = string("per_layer_combined_cast_fp16")]; + int32 var_803 = const()[name = string("op_803"), val = int32(-1)]; + fp16 const_1_promoted_to_fp16 = const()[name = string("const_1_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_805_cast_fp16 = mul(x = hidden_states, y = const_1_promoted_to_fp16)[name = string("op_805_cast_fp16")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5_cast_fp16 = concat(axis = var_803, interleave = input_5_interleave_0, values = (hidden_states, var_805_cast_fp16))[name = string("input_5_cast_fp16")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_800_to_fp16 = const()[name = string("op_800_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_800_to_fp16, x = input_5_cast_fp16)[name = string("normed_5_cast_fp16")]; + tensor var_810_split_sizes_0 = const()[name = string("op_810_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_810_axis_0 = const()[name = string("op_810_axis_0"), val = int32(-1)]; + tensor var_810_cast_fp16_0, tensor var_810_cast_fp16_1 = split(axis = var_810_axis_0, split_sizes = var_810_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_810_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545024768)))]; + tensor h_1_cast_fp16 = mul(x = var_810_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_816 = const()[name = string("op_816"), val = tensor([0, 2, 1])]; + tensor var_819_axes_0 = const()[name = string("op_819_axes_0"), val = tensor([2])]; + tensor var_817_cast_fp16 = transpose(perm = var_816, x = h_1_cast_fp16)[name = string("transpose_239")]; + tensor var_819_cast_fp16 = expand_dims(axes = var_819_axes_0, x = var_817_cast_fp16)[name = string("op_819_cast_fp16")]; + string q_1_pad_type_0 = const()[name = string("q_1_pad_type_0"), val = string("valid")]; + tensor q_1_strides_0 = const()[name = string("q_1_strides_0"), val = tensor([1, 1])]; + tensor q_1_pad_0 = const()[name = string("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_1_dilations_0 = const()[name = string("q_1_dilations_0"), val = tensor([1, 1])]; + int32 q_1_groups_0 = const()[name = string("q_1_groups_0"), val = int32(1)]; + tensor q_1 = conv(dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_819_cast_fp16)[name = string("q_1")]; + tensor var_840 = const()[name = string("op_840"), val = tensor([1, 8, 256, 3])]; + tensor var_841 = reshape(shape = var_840, x = q_1)[name = string("op_841")]; + tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_864 = const()[name = string("op_864"), val = tensor([3, 8, 256])]; + tensor transpose_48 = transpose(perm = transpose_48_perm_0, x = var_841)[name = string("transpose_238")]; + tensor x_1 = reshape(shape = var_864, x = transpose_48)[name = string("x_1")]; + int32 var_870 = const()[name = string("op_870"), val = int32(-1)]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_872 = mul(x = x_1, y = const_2_promoted)[name = string("op_872")]; + bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; + tensor input_9 = concat(axis = var_870, interleave = input_9_interleave_0, values = (x_1, var_872))[name = string("input_9")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_867_to_fp16 = const()[name = string("op_867_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_867_to_fp16, x = input_9)[name = string("normed_9_cast_fp16")]; + tensor var_877_split_sizes_0 = const()[name = string("op_877_split_sizes_0"), val = tensor([256, 256])]; + int32 var_877_axis_0 = const()[name = string("op_877_axis_0"), val = int32(-1)]; + tensor var_877_0, tensor var_877_1 = split(axis = var_877_axis_0, split_sizes = var_877_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_877")]; + tensor q_5 = mul(x = var_877_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_5")]; + tensor var_884 = const()[name = string("op_884"), val = tensor([1, 3, 8, 256])]; + tensor var_885 = reshape(shape = var_884, x = q_5)[name = string("op_885")]; + tensor var_890 = const()[name = string("op_890"), val = tensor([0, 2, 1, 3])]; + tensor q_7 = transpose(perm = var_890, x = var_885)[name = string("transpose_237")]; + tensor var_892_cast_fp16 = mul(x = q_7, y = cos_s)[name = string("op_892_cast_fp16")]; + tensor var_893_split_sizes_0 = const()[name = string("op_893_split_sizes_0"), val = tensor([128, 128])]; + int32 var_893_axis_0 = const()[name = string("op_893_axis_0"), val = int32(-1)]; + tensor var_893_0, tensor var_893_1 = split(axis = var_893_axis_0, split_sizes = var_893_split_sizes_0, x = q_7)[name = string("op_893")]; + fp16 const_3_promoted = const()[name = string("const_3_promoted"), val = fp16(-0x1p+0)]; + tensor var_895 = mul(x = var_893_1, y = const_3_promoted)[name = string("op_895")]; + int32 var_897 = const()[name = string("op_897"), val = int32(-1)]; + bool var_898_interleave_0 = const()[name = string("op_898_interleave_0"), val = bool(false)]; + tensor var_898 = concat(axis = var_897, interleave = var_898_interleave_0, values = (var_895, var_893_0))[name = string("op_898")]; + tensor var_899_cast_fp16 = mul(x = var_898, y = sin_s)[name = string("op_899_cast_fp16")]; + tensor q_11_cast_fp16 = add(x = var_892_cast_fp16, y = var_899_cast_fp16)[name = string("q_11_cast_fp16")]; + string k_1_pad_type_0 = const()[name = string("k_1_pad_type_0"), val = string("valid")]; + tensor k_1_strides_0 = const()[name = string("k_1_strides_0"), val = tensor([1, 1])]; + tensor k_1_pad_0 = const()[name = string("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_1_dilations_0 = const()[name = string("k_1_dilations_0"), val = tensor([1, 1])]; + int32 k_1_groups_0 = const()[name = string("k_1_groups_0"), val = int32(1)]; + tensor k_1 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = var_819_cast_fp16)[name = string("k_1")]; + tensor var_917 = const()[name = string("op_917"), val = tensor([1, 2, 256, 3])]; + tensor var_918 = reshape(shape = var_917, x = k_1)[name = string("op_918")]; + tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_1_pad_type_0 = const()[name = string("v_1_pad_type_0"), val = string("valid")]; + tensor v_1_strides_0 = const()[name = string("v_1_strides_0"), val = tensor([1, 1])]; + tensor v_1_pad_0 = const()[name = string("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_1_dilations_0 = const()[name = string("v_1_dilations_0"), val = tensor([1, 1])]; + int32 v_1_groups_0 = const()[name = string("v_1_groups_0"), val = int32(1)]; + tensor v_1 = conv(dilations = v_1_dilations_0, groups = v_1_groups_0, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = v_1_strides_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = var_819_cast_fp16)[name = string("v_1")]; + tensor var_945 = const()[name = string("op_945"), val = tensor([1, 2, 256, 3])]; + tensor var_946 = reshape(shape = var_945, x = v_1)[name = string("op_946")]; + tensor var_951 = const()[name = string("op_951"), val = tensor([0, 1, 3, 2])]; + tensor var_969 = const()[name = string("op_969"), val = tensor([3, 2, 256])]; + tensor transpose_49 = transpose(perm = transpose_49_perm_0, x = var_918)[name = string("transpose_236")]; + tensor x_3 = reshape(shape = var_969, x = transpose_49)[name = string("x_3")]; + int32 var_975 = const()[name = string("op_975"), val = int32(-1)]; + fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; + tensor var_977 = mul(x = x_3, y = const_4_promoted)[name = string("op_977")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11 = concat(axis = var_975, interleave = input_11_interleave_0, values = (x_3, var_977))[name = string("input_11")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_972_to_fp16 = const()[name = string("op_972_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_972_to_fp16, x = input_11)[name = string("normed_13_cast_fp16")]; + tensor var_982_split_sizes_0 = const()[name = string("op_982_split_sizes_0"), val = tensor([256, 256])]; + int32 var_982_axis_0 = const()[name = string("op_982_axis_0"), val = int32(-1)]; + tensor var_982_0, tensor var_982_1 = split(axis = var_982_axis_0, split_sizes = var_982_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_982")]; + tensor k_5 = mul(x = var_982_0, y = layers_0_self_attn_k_norm_weight)[name = string("k_5")]; + tensor var_989 = const()[name = string("op_989"), val = tensor([1, 3, 2, 256])]; + tensor var_990 = reshape(shape = var_989, x = k_5)[name = string("op_990")]; + tensor var_995 = const()[name = string("op_995"), val = tensor([0, 2, 1, 3])]; + fp16 var_997_promoted = const()[name = string("op_997_promoted"), val = fp16(0x1p+1)]; + tensor var_952 = transpose(perm = var_951, x = var_946)[name = string("transpose_235")]; + tensor var_998 = pow(x = var_952, y = var_997_promoted)[name = string("op_998")]; + tensor var_1003_axes_0 = const()[name = string("op_1003_axes_0"), val = tensor([-1])]; + bool var_1003_keep_dims_0 = const()[name = string("op_1003_keep_dims_0"), val = bool(true)]; + tensor var_1003 = reduce_mean(axes = var_1003_axes_0, keep_dims = var_1003_keep_dims_0, x = var_998)[name = string("op_1003")]; + fp16 var_1005_to_fp16 = const()[name = string("op_1005_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_1_cast_fp16 = add(x = var_1003, y = var_1005_to_fp16)[name = string("mean_sq_1_cast_fp16")]; + fp32 var_1007_epsilon_0 = const()[name = string("op_1007_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1007_cast_fp16 = rsqrt(epsilon = var_1007_epsilon_0, x = mean_sq_1_cast_fp16)[name = string("op_1007_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_952, y = var_1007_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor q_9 = transpose(perm = var_995, x = var_990)[name = string("transpose_234")]; + tensor var_1009_cast_fp16 = mul(x = q_9, y = cos_s)[name = string("op_1009_cast_fp16")]; + tensor var_1010_split_sizes_0 = const()[name = string("op_1010_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1010_axis_0 = const()[name = string("op_1010_axis_0"), val = int32(-1)]; + tensor var_1010_0, tensor var_1010_1 = split(axis = var_1010_axis_0, split_sizes = var_1010_split_sizes_0, x = q_9)[name = string("op_1010")]; + fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; + tensor var_1012 = mul(x = var_1010_1, y = const_5_promoted)[name = string("op_1012")]; + int32 var_1014 = const()[name = string("op_1014"), val = int32(-1)]; + bool var_1015_interleave_0 = const()[name = string("op_1015_interleave_0"), val = bool(false)]; + tensor var_1015 = concat(axis = var_1014, interleave = var_1015_interleave_0, values = (var_1012, var_1010_0))[name = string("op_1015")]; + tensor var_1016_cast_fp16 = mul(x = var_1015, y = sin_s)[name = string("op_1016_cast_fp16")]; + tensor input_13_cast_fp16 = add(x = var_1009_cast_fp16, y = var_1016_cast_fp16)[name = string("input_13_cast_fp16")]; + tensor k_padded_1_pad_0 = const()[name = string("k_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_1_mode_0 = const()[name = string("k_padded_1_mode_0"), val = string("constant")]; + fp16 const_6_to_fp16 = const()[name = string("const_6_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_1_cast_fp16 = pad(constant_val = const_6_to_fp16, mode = k_padded_1_mode_0, pad = k_padded_1_pad_0, x = input_13_cast_fp16)[name = string("k_padded_1_cast_fp16")]; + tensor v_padded_1_pad_0 = const()[name = string("v_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_1_mode_0 = const()[name = string("v_padded_1_mode_0"), val = string("constant")]; + fp16 const_7_to_fp16 = const()[name = string("const_7_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_1_cast_fp16 = pad(constant_val = const_7_to_fp16, mode = v_padded_1_mode_0, pad = v_padded_1_pad_0, x = input_15_cast_fp16)[name = string("v_padded_1_cast_fp16")]; + tensor slot_k_1_begin_0 = const()[name = string("slot_k_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor slot_k_1_end_0 = const()[name = string("slot_k_1_end_0"), val = tensor([1, 2, 512, 512])]; + tensor slot_k_1_end_mask_0 = const()[name = string("slot_k_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_1_cast_fp16 = slice_by_index(begin = slot_k_1_begin_0, end = slot_k_1_end_0, end_mask = slot_k_1_end_mask_0, x = K_sliding_in)[name = string("slot_k_1_cast_fp16")]; + tensor slot_v_1_begin_0 = const()[name = string("slot_v_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor slot_v_1_end_0 = const()[name = string("slot_v_1_end_0"), val = tensor([1, 2, 512, 512])]; + tensor slot_v_1_end_mask_0 = const()[name = string("slot_v_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_1_cast_fp16 = slice_by_index(begin = slot_v_1_begin_0, end = slot_v_1_end_0, end_mask = slot_v_1_end_mask_0, x = V_sliding_in)[name = string("slot_v_1_cast_fp16")]; + tensor var_1055_begin_0 = const()[name = string("op_1055_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_1055_end_0 = const()[name = string("op_1055_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1055_end_mask_0 = const()[name = string("op_1055_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1055_cast_fp16 = slice_by_index(begin = var_1055_begin_0, end = var_1055_end_0, end_mask = var_1055_end_mask_0, x = slot_k_1_cast_fp16)[name = string("op_1055_cast_fp16")]; + int32 var_1062 = const()[name = string("op_1062"), val = int32(2)]; + bool new_k_1_interleave_0 = const()[name = string("new_k_1_interleave_0"), val = bool(false)]; + tensor new_k_1_cast_fp16 = concat(axis = var_1062, interleave = new_k_1_interleave_0, values = (var_1055_cast_fp16, k_padded_1_cast_fp16))[name = string("new_k_1_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = string("op_1078_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_1078_end_0 = const()[name = string("op_1078_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1078_end_mask_0 = const()[name = string("op_1078_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = slot_v_1_cast_fp16)[name = string("op_1078_cast_fp16")]; + int32 var_1085 = const()[name = string("op_1085"), val = int32(2)]; + bool new_v_1_interleave_0 = const()[name = string("new_v_1_interleave_0"), val = bool(false)]; + tensor new_v_1_cast_fp16 = concat(axis = var_1085, interleave = new_v_1_interleave_0, values = (var_1078_cast_fp16, v_padded_1_cast_fp16))[name = string("new_v_1_cast_fp16")]; + tensor var_1096_begin_0 = const()[name = string("op_1096_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1096_end_0 = const()[name = string("op_1096_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_1096_end_mask_0 = const()[name = string("op_1096_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1096_cast_fp16 = slice_by_index(begin = var_1096_begin_0, end = var_1096_end_0, end_mask = var_1096_end_mask_0, x = K_sliding_in)[name = string("op_1096_cast_fp16")]; + int32 var_1098 = const()[name = string("op_1098"), val = int32(0)]; + bool K_sliding_out_1_interleave_0 = const()[name = string("K_sliding_out_1_interleave_0"), val = bool(false)]; + tensor K_sliding_out_1_cast_fp16 = concat(axis = var_1098, interleave = K_sliding_out_1_interleave_0, values = (new_k_1_cast_fp16, var_1096_cast_fp16))[name = string("K_sliding_out_1_cast_fp16")]; + tensor var_1109_begin_0 = const()[name = string("op_1109_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1109_end_0 = const()[name = string("op_1109_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_1109_end_mask_0 = const()[name = string("op_1109_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1109_cast_fp16 = slice_by_index(begin = var_1109_begin_0, end = var_1109_end_0, end_mask = var_1109_end_mask_0, x = V_sliding_in)[name = string("op_1109_cast_fp16")]; + int32 var_1111 = const()[name = string("op_1111"), val = int32(0)]; + bool V_sliding_out_1_interleave_0 = const()[name = string("V_sliding_out_1_interleave_0"), val = bool(false)]; + tensor V_sliding_out_1_cast_fp16 = concat(axis = var_1111, interleave = V_sliding_out_1_interleave_0, values = (new_v_1_cast_fp16, var_1109_cast_fp16))[name = string("V_sliding_out_1_cast_fp16")]; + tensor var_1117_begin_0 = const()[name = string("op_1117_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1117_end_0 = const()[name = string("op_1117_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1117_end_mask_0 = const()[name = string("op_1117_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1117_cast_fp16 = slice_by_index(begin = var_1117_begin_0, end = var_1117_end_0, end_mask = var_1117_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("op_1117_cast_fp16")]; + tensor K_for_attn_1_begin_0 = const()[name = string("K_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_1_end_0 = const()[name = string("K_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_1_end_mask_0 = const()[name = string("K_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_1_cast_fp16 = slice_by_index(begin = K_for_attn_1_begin_0, end = K_for_attn_1_end_0, end_mask = K_for_attn_1_end_mask_0, x = var_1117_cast_fp16)[name = string("K_for_attn_1_cast_fp16")]; + tensor var_1127_begin_0 = const()[name = string("op_1127_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1127_end_0 = const()[name = string("op_1127_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1127_end_mask_0 = const()[name = string("op_1127_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1127_cast_fp16 = slice_by_index(begin = var_1127_begin_0, end = var_1127_end_0, end_mask = var_1127_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("op_1127_cast_fp16")]; + tensor V_for_attn_1_begin_0 = const()[name = string("V_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_1_end_0 = const()[name = string("V_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_1_end_mask_0 = const()[name = string("V_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_1_cast_fp16 = slice_by_index(begin = V_for_attn_1_begin_0, end = V_for_attn_1_end_0, end_mask = V_for_attn_1_end_mask_0, x = var_1127_cast_fp16)[name = string("V_for_attn_1_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = K_for_attn_1_cast_fp16)[name = string("transpose_233")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_232")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = V_for_attn_1_cast_fp16)[name = string("transpose_231")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_230")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_50_cast_fp16 = transpose(perm = transpose_50_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_229")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_11_cast_fp16, y = transpose_50_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_7_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_7_cast_fp16)[name = string("reduce_max_0")]; + tensor var_1162 = sub(x = x_7_cast_fp16, y = reduce_max_0)[name = string("op_1162")]; + tensor var_1168 = exp(x = var_1162)[name = string("op_1168")]; + tensor var_1178_axes_0 = const()[name = string("op_1178_axes_0"), val = tensor([-1])]; + bool var_1178_keep_dims_0 = const()[name = string("op_1178_keep_dims_0"), val = bool(true)]; + tensor var_1178 = reduce_sum(axes = var_1178_axes_0, keep_dims = var_1178_keep_dims_0, x = var_1168)[name = string("op_1178")]; + tensor var_1184_cast_fp16 = real_div(x = var_1168, y = var_1178)[name = string("op_1184_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_228")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_1184_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_1195 = const()[name = string("op_1195"), val = tensor([0, 2, 1, 3])]; + tensor var_1202 = const()[name = string("op_1202"), val = tensor([1, 3, -1])]; + tensor var_1196_cast_fp16 = transpose(perm = var_1195, x = attn_output_1_cast_fp16)[name = string("transpose_227")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_1202, x = var_1196_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_1207 = const()[name = string("op_1207"), val = tensor([0, 2, 1])]; + string var_1223_pad_type_0 = const()[name = string("op_1223_pad_type_0"), val = string("valid")]; + int32 var_1223_groups_0 = const()[name = string("op_1223_groups_0"), val = int32(1)]; + tensor var_1223_strides_0 = const()[name = string("op_1223_strides_0"), val = tensor([1])]; + tensor var_1223_pad_0 = const()[name = string("op_1223_pad_0"), val = tensor([0, 0])]; + tensor var_1223_dilations_0 = const()[name = string("op_1223_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545029952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547651456))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1208_cast_fp16 = transpose(perm = var_1207, x = attn_output_3_cast_fp16)[name = string("transpose_226")]; + tensor var_1223_cast_fp16 = conv(dilations = var_1223_dilations_0, groups = var_1223_groups_0, pad = var_1223_pad_0, pad_type = var_1223_pad_type_0, strides = var_1223_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1208_cast_fp16)[name = string("op_1223_cast_fp16")]; + tensor var_1227 = const()[name = string("op_1227"), val = tensor([0, 2, 1])]; + int32 var_1233 = const()[name = string("op_1233"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_11_cast_fp16 = transpose(perm = var_1227, x = var_1223_cast_fp16)[name = string("transpose_225")]; + tensor var_1235_cast_fp16 = mul(x = x_11_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_1235_cast_fp16")]; + bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; + tensor input_19_cast_fp16 = concat(axis = var_1233, interleave = input_19_interleave_0, values = (x_11_cast_fp16, var_1235_cast_fp16))[name = string("input_19_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_1230_to_fp16 = const()[name = string("op_1230_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_1230_to_fp16, x = input_19_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor var_1240_split_sizes_0 = const()[name = string("op_1240_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1240_axis_0 = const()[name = string("op_1240_axis_0"), val = int32(-1)]; + tensor var_1240_cast_fp16_0, tensor var_1240_cast_fp16_1 = split(axis = var_1240_axis_0, split_sizes = var_1240_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_1240_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547654080)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_1240_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_13_cast_fp16")]; + int32 var_1249 = const()[name = string("op_1249"), val = int32(-1)]; + fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1251_cast_fp16 = mul(x = x_13_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_1251_cast_fp16")]; + bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; + tensor input_21_cast_fp16 = concat(axis = var_1249, interleave = input_21_interleave_0, values = (x_13_cast_fp16, var_1251_cast_fp16))[name = string("input_21_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_1246_to_fp16 = const()[name = string("op_1246_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_1246_to_fp16, x = input_21_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor var_1256_split_sizes_0 = const()[name = string("op_1256_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1256_axis_0 = const()[name = string("op_1256_axis_0"), val = int32(-1)]; + tensor var_1256_cast_fp16_0, tensor var_1256_cast_fp16_1 = split(axis = var_1256_axis_0, split_sizes = var_1256_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_1256_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547659264)))]; + tensor h_3_cast_fp16 = mul(x = var_1256_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_1267 = const()[name = string("op_1267"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_1268 = transpose(perm = var_1267, x = h_3_cast_fp16)[name = string("transpose_224")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_1268)[name = string("input_23")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_23)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_25 = mul(x = gate_3, y = up_1)[name = string("input_25")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_25)[name = string("mlp_out_1")]; + tensor var_1308_axes_0 = const()[name = string("op_1308_axes_0"), val = tensor([2])]; + tensor var_1308 = squeeze(axes = var_1308_axes_0, x = mlp_out_1)[name = string("op_1308")]; + tensor var_1312 = const()[name = string("op_1312"), val = tensor([0, 2, 1])]; + int32 var_1318 = const()[name = string("op_1318"), val = int32(-1)]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor x_15 = transpose(perm = var_1312, x = var_1308)[name = string("transpose_223")]; + tensor var_1320 = mul(x = x_15, y = const_10_promoted)[name = string("op_1320")]; + bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; + tensor input_27 = concat(axis = var_1318, interleave = input_27_interleave_0, values = (x_15, var_1320))[name = string("input_27")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_1315_to_fp16 = const()[name = string("op_1315_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1315_to_fp16, x = input_27)[name = string("normed_25_cast_fp16")]; + tensor var_1325_split_sizes_0 = const()[name = string("op_1325_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1325_axis_0 = const()[name = string("op_1325_axis_0"), val = int32(-1)]; + tensor var_1325_0, tensor var_1325_1 = split(axis = var_1325_axis_0, split_sizes = var_1325_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1325")]; + tensor hidden_states_3 = mul(x = var_1325_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_13_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 0])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 3, 256])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_1353 = const()[name = string("op_1353"), val = tensor([0, 2, 1])]; + tensor input_29_axes_0 = const()[name = string("input_29_axes_0"), val = tensor([2])]; + tensor var_1354 = transpose(perm = var_1353, x = hidden_states_5_cast_fp16)[name = string("transpose_222")]; + tensor input_29 = expand_dims(axes = input_29_axes_0, x = var_1354)[name = string("input_29")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_29)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_1373 = const()[name = string("op_1373"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_1374_cast_fp16 = transpose(perm = var_1373, x = per_layer_slice_1_cast_fp16)[name = string("transpose_221")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_1374_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_31_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547664448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547992192))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_31_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_1390_axes_0 = const()[name = string("op_1390_axes_0"), val = tensor([2])]; + tensor var_1390_cast_fp16 = squeeze(axes = var_1390_axes_0, x = gated_5_cast_fp16)[name = string("op_1390_cast_fp16")]; + tensor var_1394 = const()[name = string("op_1394"), val = tensor([0, 2, 1])]; + int32 var_1400 = const()[name = string("op_1400"), val = int32(-1)]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_17_cast_fp16 = transpose(perm = var_1394, x = var_1390_cast_fp16)[name = string("transpose_220")]; + tensor var_1402_cast_fp16 = mul(x = x_17_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1402_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_1400, interleave = input_33_interleave_0, values = (x_17_cast_fp16, var_1402_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_1397_to_fp16 = const()[name = string("op_1397_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1397_to_fp16, x = input_33_cast_fp16)[name = string("normed_29_cast_fp16")]; + tensor var_1407_split_sizes_0 = const()[name = string("op_1407_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1407_axis_0 = const()[name = string("op_1407_axis_0"), val = int32(-1)]; + tensor var_1407_cast_fp16_0, tensor var_1407_cast_fp16_1 = split(axis = var_1407_axis_0, split_sizes = var_1407_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1407_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547994816)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_1407_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = tensor([0x1.f4p-5])]; + tensor x_19_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_12_promoted_to_fp16)[name = string("x_19_cast_fp16")]; + int32 var_1422 = const()[name = string("op_1422"), val = int32(-1)]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1424_cast_fp16 = mul(x = x_19_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1424_cast_fp16")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35_cast_fp16 = concat(axis = var_1422, interleave = input_35_interleave_0, values = (x_19_cast_fp16, var_1424_cast_fp16))[name = string("input_35_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1419_to_fp16 = const()[name = string("op_1419_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1419_to_fp16, x = input_35_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor var_1429_split_sizes_0 = const()[name = string("op_1429_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1429_axis_0 = const()[name = string("op_1429_axis_0"), val = int32(-1)]; + tensor var_1429_cast_fp16_0, tensor var_1429_cast_fp16_1 = split(axis = var_1429_axis_0, split_sizes = var_1429_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1429_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548000000)))]; + tensor h_7_cast_fp16 = mul(x = var_1429_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_1435 = const()[name = string("op_1435"), val = tensor([0, 2, 1])]; + tensor var_1438_axes_0 = const()[name = string("op_1438_axes_0"), val = tensor([2])]; + tensor var_1436_cast_fp16 = transpose(perm = var_1435, x = h_7_cast_fp16)[name = string("transpose_219")]; + tensor var_1438_cast_fp16 = expand_dims(axes = var_1438_axes_0, x = var_1436_cast_fp16)[name = string("op_1438_cast_fp16")]; + string q_13_pad_type_0 = const()[name = string("q_13_pad_type_0"), val = string("valid")]; + tensor q_13_strides_0 = const()[name = string("q_13_strides_0"), val = tensor([1, 1])]; + tensor q_13_pad_0 = const()[name = string("q_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_13_dilations_0 = const()[name = string("q_13_dilations_0"), val = tensor([1, 1])]; + int32 q_13_groups_0 = const()[name = string("q_13_groups_0"), val = int32(1)]; + tensor q_13 = conv(dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_1438_cast_fp16)[name = string("q_13")]; + tensor var_1459 = const()[name = string("op_1459"), val = tensor([1, 8, 256, 3])]; + tensor var_1460 = reshape(shape = var_1459, x = q_13)[name = string("op_1460")]; + tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_1483 = const()[name = string("op_1483"), val = tensor([3, 8, 256])]; + tensor transpose_51 = transpose(perm = transpose_51_perm_0, x = var_1460)[name = string("transpose_218")]; + tensor x_21 = reshape(shape = var_1483, x = transpose_51)[name = string("x_21")]; + int32 var_1489 = const()[name = string("op_1489"), val = int32(-1)]; + fp16 const_14_promoted = const()[name = string("const_14_promoted"), val = fp16(-0x1p+0)]; + tensor var_1491 = mul(x = x_21, y = const_14_promoted)[name = string("op_1491")]; + bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; + tensor input_39 = concat(axis = var_1489, interleave = input_39_interleave_0, values = (x_21, var_1491))[name = string("input_39")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1486_to_fp16 = const()[name = string("op_1486_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1486_to_fp16, x = input_39)[name = string("normed_37_cast_fp16")]; + tensor var_1496_split_sizes_0 = const()[name = string("op_1496_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1496_axis_0 = const()[name = string("op_1496_axis_0"), val = int32(-1)]; + tensor var_1496_0, tensor var_1496_1 = split(axis = var_1496_axis_0, split_sizes = var_1496_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1496")]; + tensor q_17 = mul(x = var_1496_0, y = layers_1_self_attn_q_norm_weight)[name = string("q_17")]; + tensor var_1503 = const()[name = string("op_1503"), val = tensor([1, 3, 8, 256])]; + tensor var_1504 = reshape(shape = var_1503, x = q_17)[name = string("op_1504")]; + tensor var_1509 = const()[name = string("op_1509"), val = tensor([0, 2, 1, 3])]; + tensor q_19 = transpose(perm = var_1509, x = var_1504)[name = string("transpose_217")]; + tensor var_1511_cast_fp16 = mul(x = q_19, y = cos_s)[name = string("op_1511_cast_fp16")]; + tensor var_1512_split_sizes_0 = const()[name = string("op_1512_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1512_axis_0 = const()[name = string("op_1512_axis_0"), val = int32(-1)]; + tensor var_1512_0, tensor var_1512_1 = split(axis = var_1512_axis_0, split_sizes = var_1512_split_sizes_0, x = q_19)[name = string("op_1512")]; + fp16 const_15_promoted = const()[name = string("const_15_promoted"), val = fp16(-0x1p+0)]; + tensor var_1514 = mul(x = var_1512_1, y = const_15_promoted)[name = string("op_1514")]; + int32 var_1516 = const()[name = string("op_1516"), val = int32(-1)]; + bool var_1517_interleave_0 = const()[name = string("op_1517_interleave_0"), val = bool(false)]; + tensor var_1517 = concat(axis = var_1516, interleave = var_1517_interleave_0, values = (var_1514, var_1512_0))[name = string("op_1517")]; + tensor var_1518_cast_fp16 = mul(x = var_1517, y = sin_s)[name = string("op_1518_cast_fp16")]; + tensor q_23_cast_fp16 = add(x = var_1511_cast_fp16, y = var_1518_cast_fp16)[name = string("q_23_cast_fp16")]; + string k_7_pad_type_0 = const()[name = string("k_7_pad_type_0"), val = string("valid")]; + tensor k_7_strides_0 = const()[name = string("k_7_strides_0"), val = tensor([1, 1])]; + tensor k_7_pad_0 = const()[name = string("k_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_7_dilations_0 = const()[name = string("k_7_dilations_0"), val = tensor([1, 1])]; + int32 k_7_groups_0 = const()[name = string("k_7_groups_0"), val = int32(1)]; + tensor k_7 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = var_1438_cast_fp16)[name = string("k_7")]; + tensor var_1536 = const()[name = string("op_1536"), val = tensor([1, 2, 256, 3])]; + tensor var_1537 = reshape(shape = var_1536, x = k_7)[name = string("op_1537")]; + tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_3_pad_type_0 = const()[name = string("v_3_pad_type_0"), val = string("valid")]; + tensor v_3_strides_0 = const()[name = string("v_3_strides_0"), val = tensor([1, 1])]; + tensor v_3_pad_0 = const()[name = string("v_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_3_dilations_0 = const()[name = string("v_3_dilations_0"), val = tensor([1, 1])]; + int32 v_3_groups_0 = const()[name = string("v_3_groups_0"), val = int32(1)]; + tensor v_3 = conv(dilations = v_3_dilations_0, groups = v_3_groups_0, pad = v_3_pad_0, pad_type = v_3_pad_type_0, strides = v_3_strides_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = var_1438_cast_fp16)[name = string("v_3")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 2, 256, 3])]; + tensor var_1565 = reshape(shape = var_1564, x = v_3)[name = string("op_1565")]; + tensor var_1570 = const()[name = string("op_1570"), val = tensor([0, 1, 3, 2])]; + tensor var_1588 = const()[name = string("op_1588"), val = tensor([3, 2, 256])]; + tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = var_1537)[name = string("transpose_216")]; + tensor x_23 = reshape(shape = var_1588, x = transpose_52)[name = string("x_23")]; + int32 var_1594 = const()[name = string("op_1594"), val = int32(-1)]; + fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)]; + tensor var_1596 = mul(x = x_23, y = const_16_promoted)[name = string("op_1596")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_1594, interleave = input_41_interleave_0, values = (x_23, var_1596))[name = string("input_41")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1591_to_fp16 = const()[name = string("op_1591_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1591_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")]; + tensor var_1601_split_sizes_0 = const()[name = string("op_1601_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1601_axis_0 = const()[name = string("op_1601_axis_0"), val = int32(-1)]; + tensor var_1601_0, tensor var_1601_1 = split(axis = var_1601_axis_0, split_sizes = var_1601_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1601")]; + tensor k_11 = mul(x = var_1601_0, y = layers_1_self_attn_k_norm_weight)[name = string("k_11")]; + tensor var_1608 = const()[name = string("op_1608"), val = tensor([1, 3, 2, 256])]; + tensor var_1609 = reshape(shape = var_1608, x = k_11)[name = string("op_1609")]; + tensor var_1614 = const()[name = string("op_1614"), val = tensor([0, 2, 1, 3])]; + fp16 var_1616_promoted = const()[name = string("op_1616_promoted"), val = fp16(0x1p+1)]; + tensor var_1571 = transpose(perm = var_1570, x = var_1565)[name = string("transpose_215")]; + tensor var_1617 = pow(x = var_1571, y = var_1616_promoted)[name = string("op_1617")]; + tensor var_1622_axes_0 = const()[name = string("op_1622_axes_0"), val = tensor([-1])]; + bool var_1622_keep_dims_0 = const()[name = string("op_1622_keep_dims_0"), val = bool(true)]; + tensor var_1622 = reduce_mean(axes = var_1622_axes_0, keep_dims = var_1622_keep_dims_0, x = var_1617)[name = string("op_1622")]; + fp16 var_1624_to_fp16 = const()[name = string("op_1624_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_3_cast_fp16 = add(x = var_1622, y = var_1624_to_fp16)[name = string("mean_sq_3_cast_fp16")]; + fp32 var_1626_epsilon_0 = const()[name = string("op_1626_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1626_cast_fp16 = rsqrt(epsilon = var_1626_epsilon_0, x = mean_sq_3_cast_fp16)[name = string("op_1626_cast_fp16")]; + tensor input_45_cast_fp16 = mul(x = var_1571, y = var_1626_cast_fp16)[name = string("input_45_cast_fp16")]; + tensor q_21 = transpose(perm = var_1614, x = var_1609)[name = string("transpose_214")]; + tensor var_1628_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_1628_cast_fp16")]; + tensor var_1629_split_sizes_0 = const()[name = string("op_1629_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1629_axis_0 = const()[name = string("op_1629_axis_0"), val = int32(-1)]; + tensor var_1629_0, tensor var_1629_1 = split(axis = var_1629_axis_0, split_sizes = var_1629_split_sizes_0, x = q_21)[name = string("op_1629")]; + fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; + tensor var_1631 = mul(x = var_1629_1, y = const_17_promoted)[name = string("op_1631")]; + int32 var_1633 = const()[name = string("op_1633"), val = int32(-1)]; + bool var_1634_interleave_0 = const()[name = string("op_1634_interleave_0"), val = bool(false)]; + tensor var_1634 = concat(axis = var_1633, interleave = var_1634_interleave_0, values = (var_1631, var_1629_0))[name = string("op_1634")]; + tensor var_1635_cast_fp16 = mul(x = var_1634, y = sin_s)[name = string("op_1635_cast_fp16")]; + tensor input_43_cast_fp16 = add(x = var_1628_cast_fp16, y = var_1635_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor k_padded_3_pad_0 = const()[name = string("k_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_3_mode_0 = const()[name = string("k_padded_3_mode_0"), val = string("constant")]; + fp16 const_18_to_fp16 = const()[name = string("const_18_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_3_cast_fp16 = pad(constant_val = const_18_to_fp16, mode = k_padded_3_mode_0, pad = k_padded_3_pad_0, x = input_43_cast_fp16)[name = string("k_padded_3_cast_fp16")]; + tensor v_padded_3_pad_0 = const()[name = string("v_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_3_mode_0 = const()[name = string("v_padded_3_mode_0"), val = string("constant")]; + fp16 const_19_to_fp16 = const()[name = string("const_19_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_3_cast_fp16 = pad(constant_val = const_19_to_fp16, mode = v_padded_3_mode_0, pad = v_padded_3_pad_0, x = input_45_cast_fp16)[name = string("v_padded_3_cast_fp16")]; + tensor slot_k_3_begin_0 = const()[name = string("slot_k_3_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor slot_k_3_end_0 = const()[name = string("slot_k_3_end_0"), val = tensor([2, 2, 512, 512])]; + tensor slot_k_3_end_mask_0 = const()[name = string("slot_k_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_3_cast_fp16 = slice_by_index(begin = slot_k_3_begin_0, end = slot_k_3_end_0, end_mask = slot_k_3_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("slot_k_3_cast_fp16")]; + tensor slot_v_3_begin_0 = const()[name = string("slot_v_3_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor slot_v_3_end_0 = const()[name = string("slot_v_3_end_0"), val = tensor([2, 2, 512, 512])]; + tensor slot_v_3_end_mask_0 = const()[name = string("slot_v_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_3_cast_fp16 = slice_by_index(begin = slot_v_3_begin_0, end = slot_v_3_end_0, end_mask = slot_v_3_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("slot_v_3_cast_fp16")]; + tensor var_1674_begin_0 = const()[name = string("op_1674_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_1674_end_0 = const()[name = string("op_1674_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1674_end_mask_0 = const()[name = string("op_1674_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1674_cast_fp16 = slice_by_index(begin = var_1674_begin_0, end = var_1674_end_0, end_mask = var_1674_end_mask_0, x = slot_k_3_cast_fp16)[name = string("op_1674_cast_fp16")]; + int32 var_1681 = const()[name = string("op_1681"), val = int32(2)]; + bool new_k_3_interleave_0 = const()[name = string("new_k_3_interleave_0"), val = bool(false)]; + tensor new_k_3_cast_fp16 = concat(axis = var_1681, interleave = new_k_3_interleave_0, values = (var_1674_cast_fp16, k_padded_3_cast_fp16))[name = string("new_k_3_cast_fp16")]; + tensor var_1697_begin_0 = const()[name = string("op_1697_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_1697_end_0 = const()[name = string("op_1697_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1697_end_mask_0 = const()[name = string("op_1697_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1697_cast_fp16 = slice_by_index(begin = var_1697_begin_0, end = var_1697_end_0, end_mask = var_1697_end_mask_0, x = slot_v_3_cast_fp16)[name = string("op_1697_cast_fp16")]; + int32 var_1704 = const()[name = string("op_1704"), val = int32(2)]; + bool new_v_3_interleave_0 = const()[name = string("new_v_3_interleave_0"), val = bool(false)]; + tensor new_v_3_cast_fp16 = concat(axis = var_1704, interleave = new_v_3_interleave_0, values = (var_1697_cast_fp16, v_padded_3_cast_fp16))[name = string("new_v_3_cast_fp16")]; + tensor var_1715_begin_0 = const()[name = string("op_1715_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_1715_end_0 = const()[name = string("op_1715_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_1715_end_mask_0 = const()[name = string("op_1715_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1715_cast_fp16 = slice_by_index(begin = var_1715_begin_0, end = var_1715_end_0, end_mask = var_1715_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("op_1715_cast_fp16")]; + int32 var_1717 = const()[name = string("op_1717"), val = int32(0)]; + bool K_sliding_out_3_interleave_0 = const()[name = string("K_sliding_out_3_interleave_0"), val = bool(false)]; + tensor K_sliding_out_3_cast_fp16 = concat(axis = var_1717, interleave = K_sliding_out_3_interleave_0, values = (var_1117_cast_fp16, new_k_3_cast_fp16, var_1715_cast_fp16))[name = string("K_sliding_out_3_cast_fp16")]; + tensor var_1728_begin_0 = const()[name = string("op_1728_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_1728_end_0 = const()[name = string("op_1728_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_1728_end_mask_0 = const()[name = string("op_1728_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1728_cast_fp16 = slice_by_index(begin = var_1728_begin_0, end = var_1728_end_0, end_mask = var_1728_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("op_1728_cast_fp16")]; + int32 var_1730 = const()[name = string("op_1730"), val = int32(0)]; + bool V_sliding_out_3_interleave_0 = const()[name = string("V_sliding_out_3_interleave_0"), val = bool(false)]; + tensor V_sliding_out_3_cast_fp16 = concat(axis = var_1730, interleave = V_sliding_out_3_interleave_0, values = (var_1127_cast_fp16, new_v_3_cast_fp16, var_1728_cast_fp16))[name = string("V_sliding_out_3_cast_fp16")]; + tensor var_1736_begin_0 = const()[name = string("op_1736_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1736_end_0 = const()[name = string("op_1736_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1736_end_mask_0 = const()[name = string("op_1736_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1736_cast_fp16 = slice_by_index(begin = var_1736_begin_0, end = var_1736_end_0, end_mask = var_1736_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("op_1736_cast_fp16")]; + tensor K_for_attn_3_begin_0 = const()[name = string("K_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_3_end_0 = const()[name = string("K_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_3_end_mask_0 = const()[name = string("K_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_3_cast_fp16 = slice_by_index(begin = K_for_attn_3_begin_0, end = K_for_attn_3_end_0, end_mask = K_for_attn_3_end_mask_0, x = var_1736_cast_fp16)[name = string("K_for_attn_3_cast_fp16")]; + tensor var_1746_begin_0 = const()[name = string("op_1746_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1746_end_0 = const()[name = string("op_1746_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1746_end_mask_0 = const()[name = string("op_1746_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1746_cast_fp16 = slice_by_index(begin = var_1746_begin_0, end = var_1746_end_0, end_mask = var_1746_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("op_1746_cast_fp16")]; + tensor V_for_attn_3_begin_0 = const()[name = string("V_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_3_end_0 = const()[name = string("V_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_3_end_mask_0 = const()[name = string("V_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_3_cast_fp16 = slice_by_index(begin = V_for_attn_3_begin_0, end = V_for_attn_3_end_0, end_mask = V_for_attn_3_end_mask_0, x = var_1746_cast_fp16)[name = string("V_for_attn_3_cast_fp16")]; + tensor transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = K_for_attn_3_cast_fp16)[name = string("transpose_213")]; + tensor tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_4, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")]; + tensor transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_212")]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_5, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = V_for_attn_3_cast_fp16)[name = string("transpose_211")]; + tensor tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_6, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_210")]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_7, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")]; + tensor V_expanded_3_perm_0 = const()[name = string("V_expanded_3_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor transpose_53_cast_fp16 = transpose(perm = transpose_53_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_209")]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_23_cast_fp16, y = transpose_53_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_27_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_27_cast_fp16)[name = string("reduce_max_1")]; + tensor var_1781 = sub(x = x_27_cast_fp16, y = reduce_max_1)[name = string("op_1781")]; + tensor var_1787 = exp(x = var_1781)[name = string("op_1787")]; + tensor var_1797_axes_0 = const()[name = string("op_1797_axes_0"), val = tensor([-1])]; + bool var_1797_keep_dims_0 = const()[name = string("op_1797_keep_dims_0"), val = bool(true)]; + tensor var_1797 = reduce_sum(axes = var_1797_axes_0, keep_dims = var_1797_keep_dims_0, x = var_1787)[name = string("op_1797")]; + tensor var_1803_cast_fp16 = real_div(x = var_1787, y = var_1797)[name = string("op_1803_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor V_expanded_3_cast_fp16 = transpose(perm = V_expanded_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_208")]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_1803_cast_fp16, y = V_expanded_3_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_1814 = const()[name = string("op_1814"), val = tensor([0, 2, 1, 3])]; + tensor var_1821 = const()[name = string("op_1821"), val = tensor([1, 3, -1])]; + tensor var_1815_cast_fp16 = transpose(perm = var_1814, x = attn_output_7_cast_fp16)[name = string("transpose_207")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_1821, x = var_1815_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_1826 = const()[name = string("op_1826"), val = tensor([0, 2, 1])]; + string var_1842_pad_type_0 = const()[name = string("op_1842_pad_type_0"), val = string("valid")]; + int32 var_1842_groups_0 = const()[name = string("op_1842_groups_0"), val = int32(1)]; + tensor var_1842_strides_0 = const()[name = string("op_1842_strides_0"), val = tensor([1])]; + tensor var_1842_pad_0 = const()[name = string("op_1842_pad_0"), val = tensor([0, 0])]; + tensor var_1842_dilations_0 = const()[name = string("op_1842_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548005184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550626688))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1827_cast_fp16 = transpose(perm = var_1826, x = attn_output_9_cast_fp16)[name = string("transpose_206")]; + tensor var_1842_cast_fp16 = conv(dilations = var_1842_dilations_0, groups = var_1842_groups_0, pad = var_1842_pad_0, pad_type = var_1842_pad_type_0, strides = var_1842_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1827_cast_fp16)[name = string("op_1842_cast_fp16")]; + tensor var_1846 = const()[name = string("op_1846"), val = tensor([0, 2, 1])]; + int32 var_1852 = const()[name = string("op_1852"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_31_cast_fp16 = transpose(perm = var_1846, x = var_1842_cast_fp16)[name = string("transpose_205")]; + tensor var_1854_cast_fp16 = mul(x = x_31_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1854_cast_fp16")]; + bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; + tensor input_49_cast_fp16 = concat(axis = var_1852, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1854_cast_fp16))[name = string("input_49_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1849_to_fp16 = const()[name = string("op_1849_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1849_to_fp16, x = input_49_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1859_split_sizes_0 = const()[name = string("op_1859_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1859_axis_0 = const()[name = string("op_1859_axis_0"), val = int32(-1)]; + tensor var_1859_cast_fp16_0, tensor var_1859_cast_fp16_1 = split(axis = var_1859_axis_0, split_sizes = var_1859_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1859_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550629312)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1859_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_19_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_33_cast_fp16")]; + int32 var_1868 = const()[name = string("op_1868"), val = int32(-1)]; + fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1870_cast_fp16 = mul(x = x_33_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1870_cast_fp16")]; + bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; + tensor input_51_cast_fp16 = concat(axis = var_1868, interleave = input_51_interleave_0, values = (x_33_cast_fp16, var_1870_cast_fp16))[name = string("input_51_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1865_to_fp16 = const()[name = string("op_1865_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1865_to_fp16, x = input_51_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor var_1875_split_sizes_0 = const()[name = string("op_1875_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1875_axis_0 = const()[name = string("op_1875_axis_0"), val = int32(-1)]; + tensor var_1875_cast_fp16_0, tensor var_1875_cast_fp16_1 = split(axis = var_1875_axis_0, split_sizes = var_1875_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1875_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550634496)))]; + tensor h_9_cast_fp16 = mul(x = var_1875_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1886 = const()[name = string("op_1886"), val = tensor([0, 2, 1])]; + tensor input_53_axes_0 = const()[name = string("input_53_axes_0"), val = tensor([2])]; + tensor var_1887 = transpose(perm = var_1886, x = h_9_cast_fp16)[name = string("transpose_204")]; + tensor input_53 = expand_dims(axes = input_53_axes_0, x = var_1887)[name = string("input_53")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_53)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_53)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_55 = mul(x = gate_7, y = up_3)[name = string("input_55")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_55)[name = string("mlp_out_3")]; + tensor var_1927_axes_0 = const()[name = string("op_1927_axes_0"), val = tensor([2])]; + tensor var_1927 = squeeze(axes = var_1927_axes_0, x = mlp_out_3)[name = string("op_1927")]; + tensor var_1931 = const()[name = string("op_1931"), val = tensor([0, 2, 1])]; + int32 var_1937 = const()[name = string("op_1937"), val = int32(-1)]; + fp16 const_22_promoted = const()[name = string("const_22_promoted"), val = fp16(-0x1p+0)]; + tensor x_35 = transpose(perm = var_1931, x = var_1927)[name = string("transpose_203")]; + tensor var_1939 = mul(x = x_35, y = const_22_promoted)[name = string("op_1939")]; + bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; + tensor input_57 = concat(axis = var_1937, interleave = input_57_interleave_0, values = (x_35, var_1939))[name = string("input_57")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1934_to_fp16 = const()[name = string("op_1934_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1934_to_fp16, x = input_57)[name = string("normed_53_cast_fp16")]; + tensor var_1944_split_sizes_0 = const()[name = string("op_1944_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1944_axis_0 = const()[name = string("op_1944_axis_0"), val = int32(-1)]; + tensor var_1944_0, tensor var_1944_1 = split(axis = var_1944_axis_0, split_sizes = var_1944_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1944")]; + tensor hidden_states_13 = mul(x = var_1944_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_33_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 256])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 3, 512])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1972 = const()[name = string("op_1972"), val = tensor([0, 2, 1])]; + tensor input_59_axes_0 = const()[name = string("input_59_axes_0"), val = tensor([2])]; + tensor var_1973 = transpose(perm = var_1972, x = hidden_states_15_cast_fp16)[name = string("transpose_202")]; + tensor input_59 = expand_dims(axes = input_59_axes_0, x = var_1973)[name = string("input_59")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_59)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1992 = const()[name = string("op_1992"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1993_cast_fp16 = transpose(perm = var_1992, x = per_layer_slice_3_cast_fp16)[name = string("transpose_201")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1993_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_61_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_61_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550639680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550967424))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_2009_axes_0 = const()[name = string("op_2009_axes_0"), val = tensor([2])]; + tensor var_2009_cast_fp16 = squeeze(axes = var_2009_axes_0, x = gated_11_cast_fp16)[name = string("op_2009_cast_fp16")]; + tensor var_2013 = const()[name = string("op_2013"), val = tensor([0, 2, 1])]; + int32 var_2019 = const()[name = string("op_2019"), val = int32(-1)]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_37_cast_fp16 = transpose(perm = var_2013, x = var_2009_cast_fp16)[name = string("transpose_200")]; + tensor var_2021_cast_fp16 = mul(x = x_37_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_2021_cast_fp16")]; + bool input_63_interleave_0 = const()[name = string("input_63_interleave_0"), val = bool(false)]; + tensor input_63_cast_fp16 = concat(axis = var_2019, interleave = input_63_interleave_0, values = (x_37_cast_fp16, var_2021_cast_fp16))[name = string("input_63_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_2016_to_fp16 = const()[name = string("op_2016_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_2016_to_fp16, x = input_63_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_2026_split_sizes_0 = const()[name = string("op_2026_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2026_axis_0 = const()[name = string("op_2026_axis_0"), val = int32(-1)]; + tensor var_2026_cast_fp16_0, tensor var_2026_cast_fp16_1 = split(axis = var_2026_axis_0, split_sizes = var_2026_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_2026_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550970048)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_2026_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = tensor([0x1.48p-3])]; + tensor x_39_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_24_promoted_to_fp16)[name = string("x_39_cast_fp16")]; + int32 var_2041 = const()[name = string("op_2041"), val = int32(-1)]; + fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2043_cast_fp16 = mul(x = x_39_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_2043_cast_fp16")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65_cast_fp16 = concat(axis = var_2041, interleave = input_65_interleave_0, values = (x_39_cast_fp16, var_2043_cast_fp16))[name = string("input_65_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_2038_to_fp16 = const()[name = string("op_2038_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_2038_to_fp16, x = input_65_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor var_2048_split_sizes_0 = const()[name = string("op_2048_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2048_axis_0 = const()[name = string("op_2048_axis_0"), val = int32(-1)]; + tensor var_2048_cast_fp16_0, tensor var_2048_cast_fp16_1 = split(axis = var_2048_axis_0, split_sizes = var_2048_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_2048_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550975232)))]; + tensor h_13_cast_fp16 = mul(x = var_2048_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_2054 = const()[name = string("op_2054"), val = tensor([0, 2, 1])]; + tensor var_2057_axes_0 = const()[name = string("op_2057_axes_0"), val = tensor([2])]; + tensor var_2055_cast_fp16 = transpose(perm = var_2054, x = h_13_cast_fp16)[name = string("transpose_199")]; + tensor var_2057_cast_fp16 = expand_dims(axes = var_2057_axes_0, x = var_2055_cast_fp16)[name = string("op_2057_cast_fp16")]; + string q_25_pad_type_0 = const()[name = string("q_25_pad_type_0"), val = string("valid")]; + tensor q_25_strides_0 = const()[name = string("q_25_strides_0"), val = tensor([1, 1])]; + tensor q_25_pad_0 = const()[name = string("q_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_25_dilations_0 = const()[name = string("q_25_dilations_0"), val = tensor([1, 1])]; + int32 q_25_groups_0 = const()[name = string("q_25_groups_0"), val = int32(1)]; + tensor q_25 = conv(dilations = q_25_dilations_0, groups = q_25_groups_0, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = q_25_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_2057_cast_fp16)[name = string("q_25")]; + tensor var_2078 = const()[name = string("op_2078"), val = tensor([1, 8, 256, 3])]; + tensor var_2079 = reshape(shape = var_2078, x = q_25)[name = string("op_2079")]; + tensor transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2102 = const()[name = string("op_2102"), val = tensor([3, 8, 256])]; + tensor transpose_54 = transpose(perm = transpose_54_perm_0, x = var_2079)[name = string("transpose_198")]; + tensor x_41 = reshape(shape = var_2102, x = transpose_54)[name = string("x_41")]; + int32 var_2108 = const()[name = string("op_2108"), val = int32(-1)]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_2110 = mul(x = x_41, y = const_26_promoted)[name = string("op_2110")]; + bool input_69_interleave_0 = const()[name = string("input_69_interleave_0"), val = bool(false)]; + tensor input_69 = concat(axis = var_2108, interleave = input_69_interleave_0, values = (x_41, var_2110))[name = string("input_69")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_2105_to_fp16 = const()[name = string("op_2105_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_2105_to_fp16, x = input_69)[name = string("normed_65_cast_fp16")]; + tensor var_2115_split_sizes_0 = const()[name = string("op_2115_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2115_axis_0 = const()[name = string("op_2115_axis_0"), val = int32(-1)]; + tensor var_2115_0, tensor var_2115_1 = split(axis = var_2115_axis_0, split_sizes = var_2115_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_2115")]; + tensor q_29 = mul(x = var_2115_0, y = layers_2_self_attn_q_norm_weight)[name = string("q_29")]; + tensor var_2122 = const()[name = string("op_2122"), val = tensor([1, 3, 8, 256])]; + tensor var_2123 = reshape(shape = var_2122, x = q_29)[name = string("op_2123")]; + tensor var_2128 = const()[name = string("op_2128"), val = tensor([0, 2, 1, 3])]; + tensor q_31 = transpose(perm = var_2128, x = var_2123)[name = string("transpose_197")]; + tensor var_2130_cast_fp16 = mul(x = q_31, y = cos_s)[name = string("op_2130_cast_fp16")]; + tensor var_2131_split_sizes_0 = const()[name = string("op_2131_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2131_axis_0 = const()[name = string("op_2131_axis_0"), val = int32(-1)]; + tensor var_2131_0, tensor var_2131_1 = split(axis = var_2131_axis_0, split_sizes = var_2131_split_sizes_0, x = q_31)[name = string("op_2131")]; + fp16 const_27_promoted = const()[name = string("const_27_promoted"), val = fp16(-0x1p+0)]; + tensor var_2133 = mul(x = var_2131_1, y = const_27_promoted)[name = string("op_2133")]; + int32 var_2135 = const()[name = string("op_2135"), val = int32(-1)]; + bool var_2136_interleave_0 = const()[name = string("op_2136_interleave_0"), val = bool(false)]; + tensor var_2136 = concat(axis = var_2135, interleave = var_2136_interleave_0, values = (var_2133, var_2131_0))[name = string("op_2136")]; + tensor var_2137_cast_fp16 = mul(x = var_2136, y = sin_s)[name = string("op_2137_cast_fp16")]; + tensor q_35_cast_fp16 = add(x = var_2130_cast_fp16, y = var_2137_cast_fp16)[name = string("q_35_cast_fp16")]; + string k_13_pad_type_0 = const()[name = string("k_13_pad_type_0"), val = string("valid")]; + tensor k_13_strides_0 = const()[name = string("k_13_strides_0"), val = tensor([1, 1])]; + tensor k_13_pad_0 = const()[name = string("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_13_dilations_0 = const()[name = string("k_13_dilations_0"), val = tensor([1, 1])]; + int32 k_13_groups_0 = const()[name = string("k_13_groups_0"), val = int32(1)]; + tensor k_13 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = var_2057_cast_fp16)[name = string("k_13")]; + tensor var_2155 = const()[name = string("op_2155"), val = tensor([1, 2, 256, 3])]; + tensor var_2156 = reshape(shape = var_2155, x = k_13)[name = string("op_2156")]; + tensor transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_5_pad_type_0 = const()[name = string("v_5_pad_type_0"), val = string("valid")]; + tensor v_5_strides_0 = const()[name = string("v_5_strides_0"), val = tensor([1, 1])]; + tensor v_5_pad_0 = const()[name = string("v_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_5_dilations_0 = const()[name = string("v_5_dilations_0"), val = tensor([1, 1])]; + int32 v_5_groups_0 = const()[name = string("v_5_groups_0"), val = int32(1)]; + tensor v_5 = conv(dilations = v_5_dilations_0, groups = v_5_groups_0, pad = v_5_pad_0, pad_type = v_5_pad_type_0, strides = v_5_strides_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = var_2057_cast_fp16)[name = string("v_5")]; + tensor var_2183 = const()[name = string("op_2183"), val = tensor([1, 2, 256, 3])]; + tensor var_2184 = reshape(shape = var_2183, x = v_5)[name = string("op_2184")]; + tensor var_2189 = const()[name = string("op_2189"), val = tensor([0, 1, 3, 2])]; + tensor var_2207 = const()[name = string("op_2207"), val = tensor([3, 2, 256])]; + tensor transpose_55 = transpose(perm = transpose_55_perm_0, x = var_2156)[name = string("transpose_196")]; + tensor x_43 = reshape(shape = var_2207, x = transpose_55)[name = string("x_43")]; + int32 var_2213 = const()[name = string("op_2213"), val = int32(-1)]; + fp16 const_28_promoted = const()[name = string("const_28_promoted"), val = fp16(-0x1p+0)]; + tensor var_2215 = mul(x = x_43, y = const_28_promoted)[name = string("op_2215")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71 = concat(axis = var_2213, interleave = input_71_interleave_0, values = (x_43, var_2215))[name = string("input_71")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_2210_to_fp16 = const()[name = string("op_2210_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2210_to_fp16, x = input_71)[name = string("normed_69_cast_fp16")]; + tensor var_2220_split_sizes_0 = const()[name = string("op_2220_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2220_axis_0 = const()[name = string("op_2220_axis_0"), val = int32(-1)]; + tensor var_2220_0, tensor var_2220_1 = split(axis = var_2220_axis_0, split_sizes = var_2220_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2220")]; + tensor k_17 = mul(x = var_2220_0, y = layers_2_self_attn_k_norm_weight)[name = string("k_17")]; + tensor var_2227 = const()[name = string("op_2227"), val = tensor([1, 3, 2, 256])]; + tensor var_2228 = reshape(shape = var_2227, x = k_17)[name = string("op_2228")]; + tensor var_2233 = const()[name = string("op_2233"), val = tensor([0, 2, 1, 3])]; + fp16 var_2235_promoted = const()[name = string("op_2235_promoted"), val = fp16(0x1p+1)]; + tensor var_2190 = transpose(perm = var_2189, x = var_2184)[name = string("transpose_195")]; + tensor var_2236 = pow(x = var_2190, y = var_2235_promoted)[name = string("op_2236")]; + tensor var_2241_axes_0 = const()[name = string("op_2241_axes_0"), val = tensor([-1])]; + bool var_2241_keep_dims_0 = const()[name = string("op_2241_keep_dims_0"), val = bool(true)]; + tensor var_2241 = reduce_mean(axes = var_2241_axes_0, keep_dims = var_2241_keep_dims_0, x = var_2236)[name = string("op_2241")]; + fp16 var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_5_cast_fp16 = add(x = var_2241, y = var_2243_to_fp16)[name = string("mean_sq_5_cast_fp16")]; + fp32 var_2245_epsilon_0 = const()[name = string("op_2245_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2245_cast_fp16 = rsqrt(epsilon = var_2245_epsilon_0, x = mean_sq_5_cast_fp16)[name = string("op_2245_cast_fp16")]; + tensor input_75_cast_fp16 = mul(x = var_2190, y = var_2245_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor q_33 = transpose(perm = var_2233, x = var_2228)[name = string("transpose_194")]; + tensor var_2247_cast_fp16 = mul(x = q_33, y = cos_s)[name = string("op_2247_cast_fp16")]; + tensor var_2248_split_sizes_0 = const()[name = string("op_2248_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2248_axis_0 = const()[name = string("op_2248_axis_0"), val = int32(-1)]; + tensor var_2248_0, tensor var_2248_1 = split(axis = var_2248_axis_0, split_sizes = var_2248_split_sizes_0, x = q_33)[name = string("op_2248")]; + fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; + tensor var_2250 = mul(x = var_2248_1, y = const_29_promoted)[name = string("op_2250")]; + int32 var_2252 = const()[name = string("op_2252"), val = int32(-1)]; + bool var_2253_interleave_0 = const()[name = string("op_2253_interleave_0"), val = bool(false)]; + tensor var_2253 = concat(axis = var_2252, interleave = var_2253_interleave_0, values = (var_2250, var_2248_0))[name = string("op_2253")]; + tensor var_2254_cast_fp16 = mul(x = var_2253, y = sin_s)[name = string("op_2254_cast_fp16")]; + tensor input_73_cast_fp16 = add(x = var_2247_cast_fp16, y = var_2254_cast_fp16)[name = string("input_73_cast_fp16")]; + tensor k_padded_5_pad_0 = const()[name = string("k_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_5_mode_0 = const()[name = string("k_padded_5_mode_0"), val = string("constant")]; + fp16 const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_5_cast_fp16 = pad(constant_val = const_30_to_fp16, mode = k_padded_5_mode_0, pad = k_padded_5_pad_0, x = input_73_cast_fp16)[name = string("k_padded_5_cast_fp16")]; + tensor v_padded_5_pad_0 = const()[name = string("v_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_5_mode_0 = const()[name = string("v_padded_5_mode_0"), val = string("constant")]; + fp16 const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_5_cast_fp16 = pad(constant_val = const_31_to_fp16, mode = v_padded_5_mode_0, pad = v_padded_5_pad_0, x = input_75_cast_fp16)[name = string("v_padded_5_cast_fp16")]; + tensor slot_k_5_begin_0 = const()[name = string("slot_k_5_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor slot_k_5_end_0 = const()[name = string("slot_k_5_end_0"), val = tensor([3, 2, 512, 512])]; + tensor slot_k_5_end_mask_0 = const()[name = string("slot_k_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_5_cast_fp16 = slice_by_index(begin = slot_k_5_begin_0, end = slot_k_5_end_0, end_mask = slot_k_5_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("slot_k_5_cast_fp16")]; + tensor slot_v_5_begin_0 = const()[name = string("slot_v_5_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor slot_v_5_end_0 = const()[name = string("slot_v_5_end_0"), val = tensor([3, 2, 512, 512])]; + tensor slot_v_5_end_mask_0 = const()[name = string("slot_v_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_5_cast_fp16 = slice_by_index(begin = slot_v_5_begin_0, end = slot_v_5_end_0, end_mask = slot_v_5_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("slot_v_5_cast_fp16")]; + tensor var_2293_begin_0 = const()[name = string("op_2293_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_2293_end_0 = const()[name = string("op_2293_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2293_end_mask_0 = const()[name = string("op_2293_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2293_cast_fp16 = slice_by_index(begin = var_2293_begin_0, end = var_2293_end_0, end_mask = var_2293_end_mask_0, x = slot_k_5_cast_fp16)[name = string("op_2293_cast_fp16")]; + int32 var_2300 = const()[name = string("op_2300"), val = int32(2)]; + bool new_k_5_interleave_0 = const()[name = string("new_k_5_interleave_0"), val = bool(false)]; + tensor new_k_5_cast_fp16 = concat(axis = var_2300, interleave = new_k_5_interleave_0, values = (var_2293_cast_fp16, k_padded_5_cast_fp16))[name = string("new_k_5_cast_fp16")]; + tensor var_2316_begin_0 = const()[name = string("op_2316_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_2316_end_0 = const()[name = string("op_2316_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2316_end_mask_0 = const()[name = string("op_2316_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2316_cast_fp16 = slice_by_index(begin = var_2316_begin_0, end = var_2316_end_0, end_mask = var_2316_end_mask_0, x = slot_v_5_cast_fp16)[name = string("op_2316_cast_fp16")]; + int32 var_2323 = const()[name = string("op_2323"), val = int32(2)]; + bool new_v_5_interleave_0 = const()[name = string("new_v_5_interleave_0"), val = bool(false)]; + tensor new_v_5_cast_fp16 = concat(axis = var_2323, interleave = new_v_5_interleave_0, values = (var_2316_cast_fp16, v_padded_5_cast_fp16))[name = string("new_v_5_cast_fp16")]; + tensor var_2329_begin_0 = const()[name = string("op_2329_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2329_end_0 = const()[name = string("op_2329_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_2329_end_mask_0 = const()[name = string("op_2329_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2329_cast_fp16 = slice_by_index(begin = var_2329_begin_0, end = var_2329_end_0, end_mask = var_2329_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("op_2329_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = string("op_2334_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2334_end_0 = const()[name = string("op_2334_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_2334_end_mask_0 = const()[name = string("op_2334_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("op_2334_cast_fp16")]; + int32 var_2336 = const()[name = string("op_2336"), val = int32(0)]; + bool K_sliding_out_5_interleave_0 = const()[name = string("K_sliding_out_5_interleave_0"), val = bool(false)]; + tensor K_sliding_out_5_cast_fp16 = concat(axis = var_2336, interleave = K_sliding_out_5_interleave_0, values = (var_2329_cast_fp16, new_k_5_cast_fp16, var_2334_cast_fp16))[name = string("K_sliding_out_5_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = string("op_2342_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2342_end_0 = const()[name = string("op_2342_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_2342_end_mask_0 = const()[name = string("op_2342_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("op_2342_cast_fp16")]; + tensor var_2347_begin_0 = const()[name = string("op_2347_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2347_end_0 = const()[name = string("op_2347_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_2347_end_mask_0 = const()[name = string("op_2347_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2347_cast_fp16 = slice_by_index(begin = var_2347_begin_0, end = var_2347_end_0, end_mask = var_2347_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("op_2347_cast_fp16")]; + int32 var_2349 = const()[name = string("op_2349"), val = int32(0)]; + bool V_sliding_out_5_interleave_0 = const()[name = string("V_sliding_out_5_interleave_0"), val = bool(false)]; + tensor V_sliding_out_5_cast_fp16 = concat(axis = var_2349, interleave = V_sliding_out_5_interleave_0, values = (var_2342_cast_fp16, new_v_5_cast_fp16, var_2347_cast_fp16))[name = string("V_sliding_out_5_cast_fp16")]; + tensor var_2355_begin_0 = const()[name = string("op_2355_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_2355_end_0 = const()[name = string("op_2355_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2355_end_mask_0 = const()[name = string("op_2355_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2355_cast_fp16 = slice_by_index(begin = var_2355_begin_0, end = var_2355_end_0, end_mask = var_2355_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2355_cast_fp16")]; + tensor K_for_attn_5_begin_0 = const()[name = string("K_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_5_end_0 = const()[name = string("K_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_5_end_mask_0 = const()[name = string("K_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_5_cast_fp16 = slice_by_index(begin = K_for_attn_5_begin_0, end = K_for_attn_5_end_0, end_mask = K_for_attn_5_end_mask_0, x = var_2355_cast_fp16)[name = string("K_for_attn_5_cast_fp16")]; + tensor var_2365_begin_0 = const()[name = string("op_2365_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_2365_end_0 = const()[name = string("op_2365_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2365_end_mask_0 = const()[name = string("op_2365_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2365_cast_fp16 = slice_by_index(begin = var_2365_begin_0, end = var_2365_end_0, end_mask = var_2365_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2365_cast_fp16")]; + tensor V_for_attn_5_begin_0 = const()[name = string("V_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_5_end_0 = const()[name = string("V_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_5_end_mask_0 = const()[name = string("V_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_5_cast_fp16 = slice_by_index(begin = V_for_attn_5_begin_0, end = V_for_attn_5_end_0, end_mask = V_for_attn_5_end_mask_0, x = var_2365_cast_fp16)[name = string("V_for_attn_5_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = K_for_attn_5_cast_fp16)[name = string("transpose_193")]; + tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_8, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_192")]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_9, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = V_for_attn_5_cast_fp16)[name = string("transpose_191")]; + tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_10, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; + tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_190")]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_11, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor transpose_56_cast_fp16 = transpose(perm = transpose_56_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_189")]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_35_cast_fp16, y = transpose_56_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_47_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_47_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_47_cast_fp16)[name = string("reduce_max_2")]; + tensor var_2400 = sub(x = x_47_cast_fp16, y = reduce_max_2)[name = string("op_2400")]; + tensor var_2406 = exp(x = var_2400)[name = string("op_2406")]; + tensor var_2416_axes_0 = const()[name = string("op_2416_axes_0"), val = tensor([-1])]; + bool var_2416_keep_dims_0 = const()[name = string("op_2416_keep_dims_0"), val = bool(true)]; + tensor var_2416 = reduce_sum(axes = var_2416_axes_0, keep_dims = var_2416_keep_dims_0, x = var_2406)[name = string("op_2416")]; + tensor var_2422_cast_fp16 = real_div(x = var_2406, y = var_2416)[name = string("op_2422_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_188")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_2422_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_2433 = const()[name = string("op_2433"), val = tensor([0, 2, 1, 3])]; + tensor var_2440 = const()[name = string("op_2440"), val = tensor([1, 3, -1])]; + tensor var_2434_cast_fp16 = transpose(perm = var_2433, x = attn_output_13_cast_fp16)[name = string("transpose_187")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2440, x = var_2434_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2445 = const()[name = string("op_2445"), val = tensor([0, 2, 1])]; + string var_2461_pad_type_0 = const()[name = string("op_2461_pad_type_0"), val = string("valid")]; + int32 var_2461_groups_0 = const()[name = string("op_2461_groups_0"), val = int32(1)]; + tensor var_2461_strides_0 = const()[name = string("op_2461_strides_0"), val = tensor([1])]; + tensor var_2461_pad_0 = const()[name = string("op_2461_pad_0"), val = tensor([0, 0])]; + tensor var_2461_dilations_0 = const()[name = string("op_2461_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550980416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553601920))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2446_cast_fp16 = transpose(perm = var_2445, x = attn_output_15_cast_fp16)[name = string("transpose_186")]; + tensor var_2461_cast_fp16 = conv(dilations = var_2461_dilations_0, groups = var_2461_groups_0, pad = var_2461_pad_0, pad_type = var_2461_pad_type_0, strides = var_2461_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2446_cast_fp16)[name = string("op_2461_cast_fp16")]; + tensor var_2465 = const()[name = string("op_2465"), val = tensor([0, 2, 1])]; + int32 var_2471 = const()[name = string("op_2471"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_51_cast_fp16 = transpose(perm = var_2465, x = var_2461_cast_fp16)[name = string("transpose_185")]; + tensor var_2473_cast_fp16 = mul(x = x_51_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2473_cast_fp16")]; + bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; + tensor input_79_cast_fp16 = concat(axis = var_2471, interleave = input_79_interleave_0, values = (x_51_cast_fp16, var_2473_cast_fp16))[name = string("input_79_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_2468_to_fp16 = const()[name = string("op_2468_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2468_to_fp16, x = input_79_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_2478_split_sizes_0 = const()[name = string("op_2478_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2478_axis_0 = const()[name = string("op_2478_axis_0"), val = int32(-1)]; + tensor var_2478_cast_fp16_0, tensor var_2478_cast_fp16_1 = split(axis = var_2478_axis_0, split_sizes = var_2478_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2478_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553604544)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_2478_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_39_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_2487 = const()[name = string("op_2487"), val = int32(-1)]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2489_cast_fp16 = mul(x = x_53_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_2489_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_2487, interleave = input_81_interleave_0, values = (x_53_cast_fp16, var_2489_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_2484_to_fp16 = const()[name = string("op_2484_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2484_to_fp16, x = input_81_cast_fp16)[name = string("normed_77_cast_fp16")]; + tensor var_2494_split_sizes_0 = const()[name = string("op_2494_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2494_axis_0 = const()[name = string("op_2494_axis_0"), val = int32(-1)]; + tensor var_2494_cast_fp16_0, tensor var_2494_cast_fp16_1 = split(axis = var_2494_axis_0, split_sizes = var_2494_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2494_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553609728)))]; + tensor h_15_cast_fp16 = mul(x = var_2494_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_2505 = const()[name = string("op_2505"), val = tensor([0, 2, 1])]; + tensor input_83_axes_0 = const()[name = string("input_83_axes_0"), val = tensor([2])]; + tensor var_2506 = transpose(perm = var_2505, x = h_15_cast_fp16)[name = string("transpose_184")]; + tensor input_83 = expand_dims(axes = input_83_axes_0, x = var_2506)[name = string("input_83")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_83)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_83)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_85 = mul(x = gate_11, y = up_5)[name = string("input_85")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_85)[name = string("mlp_out_5")]; + tensor var_2546_axes_0 = const()[name = string("op_2546_axes_0"), val = tensor([2])]; + tensor var_2546 = squeeze(axes = var_2546_axes_0, x = mlp_out_5)[name = string("op_2546")]; + tensor var_2550 = const()[name = string("op_2550"), val = tensor([0, 2, 1])]; + int32 var_2556 = const()[name = string("op_2556"), val = int32(-1)]; + fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; + tensor x_55 = transpose(perm = var_2550, x = var_2546)[name = string("transpose_183")]; + tensor var_2558 = mul(x = x_55, y = const_34_promoted)[name = string("op_2558")]; + bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; + tensor input_87 = concat(axis = var_2556, interleave = input_87_interleave_0, values = (x_55, var_2558))[name = string("input_87")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_2553_to_fp16 = const()[name = string("op_2553_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2553_to_fp16, x = input_87)[name = string("normed_81_cast_fp16")]; + tensor var_2563_split_sizes_0 = const()[name = string("op_2563_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2563_axis_0 = const()[name = string("op_2563_axis_0"), val = int32(-1)]; + tensor var_2563_0, tensor var_2563_1 = split(axis = var_2563_axis_0, split_sizes = var_2563_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2563")]; + tensor hidden_states_23 = mul(x = var_2563_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 512])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 3, 768])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_2591 = const()[name = string("op_2591"), val = tensor([0, 2, 1])]; + tensor input_89_axes_0 = const()[name = string("input_89_axes_0"), val = tensor([2])]; + tensor var_2592 = transpose(perm = var_2591, x = hidden_states_25_cast_fp16)[name = string("transpose_182")]; + tensor input_89 = expand_dims(axes = input_89_axes_0, x = var_2592)[name = string("input_89")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_89)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_2611 = const()[name = string("op_2611"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_2612_cast_fp16 = transpose(perm = var_2611, x = per_layer_slice_5_cast_fp16)[name = string("transpose_181")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_2612_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_91_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_91_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553614912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553942656))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_91_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_2628_axes_0 = const()[name = string("op_2628_axes_0"), val = tensor([2])]; + tensor var_2628_cast_fp16 = squeeze(axes = var_2628_axes_0, x = gated_17_cast_fp16)[name = string("op_2628_cast_fp16")]; + tensor var_2632 = const()[name = string("op_2632"), val = tensor([0, 2, 1])]; + int32 var_2638 = const()[name = string("op_2638"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_57_cast_fp16 = transpose(perm = var_2632, x = var_2628_cast_fp16)[name = string("transpose_180")]; + tensor var_2640_cast_fp16 = mul(x = x_57_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2640_cast_fp16")]; + bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; + tensor input_93_cast_fp16 = concat(axis = var_2638, interleave = input_93_interleave_0, values = (x_57_cast_fp16, var_2640_cast_fp16))[name = string("input_93_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2635_to_fp16, x = input_93_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_2645_split_sizes_0 = const()[name = string("op_2645_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2645_axis_0 = const()[name = string("op_2645_axis_0"), val = int32(-1)]; + tensor var_2645_cast_fp16_0, tensor var_2645_cast_fp16_1 = split(axis = var_2645_axis_0, split_sizes = var_2645_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2645_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553945280)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_2645_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = tensor([0x1.aep-1])]; + tensor x_59_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_36_promoted_to_fp16)[name = string("x_59_cast_fp16")]; + int32 var_2660 = const()[name = string("op_2660"), val = int32(-1)]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2662_cast_fp16 = mul(x = x_59_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_2662_cast_fp16")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95_cast_fp16 = concat(axis = var_2660, interleave = input_95_interleave_0, values = (x_59_cast_fp16, var_2662_cast_fp16))[name = string("input_95_cast_fp16")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_2657_to_fp16, x = input_95_cast_fp16)[name = string("normed_89_cast_fp16")]; + tensor var_2667_split_sizes_0 = const()[name = string("op_2667_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2667_axis_0 = const()[name = string("op_2667_axis_0"), val = int32(-1)]; + tensor var_2667_cast_fp16_0, tensor var_2667_cast_fp16_1 = split(axis = var_2667_axis_0, split_sizes = var_2667_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_2667_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553950464)))]; + tensor h_19_cast_fp16 = mul(x = var_2667_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_2673 = const()[name = string("op_2673"), val = tensor([0, 2, 1])]; + tensor var_2676_axes_0 = const()[name = string("op_2676_axes_0"), val = tensor([2])]; + tensor var_2674_cast_fp16 = transpose(perm = var_2673, x = h_19_cast_fp16)[name = string("transpose_179")]; + tensor var_2676_cast_fp16 = expand_dims(axes = var_2676_axes_0, x = var_2674_cast_fp16)[name = string("op_2676_cast_fp16")]; + string q_37_pad_type_0 = const()[name = string("q_37_pad_type_0"), val = string("valid")]; + tensor q_37_strides_0 = const()[name = string("q_37_strides_0"), val = tensor([1, 1])]; + tensor q_37_pad_0 = const()[name = string("q_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_37_dilations_0 = const()[name = string("q_37_dilations_0"), val = tensor([1, 1])]; + int32 q_37_groups_0 = const()[name = string("q_37_groups_0"), val = int32(1)]; + tensor q_37 = conv(dilations = q_37_dilations_0, groups = q_37_groups_0, pad = q_37_pad_0, pad_type = q_37_pad_type_0, strides = q_37_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_2676_cast_fp16)[name = string("q_37")]; + tensor var_2697 = const()[name = string("op_2697"), val = tensor([1, 8, 256, 3])]; + tensor var_2698 = reshape(shape = var_2697, x = q_37)[name = string("op_2698")]; + tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2721 = const()[name = string("op_2721"), val = tensor([3, 8, 256])]; + tensor transpose_57 = transpose(perm = transpose_57_perm_0, x = var_2698)[name = string("transpose_178")]; + tensor x_61 = reshape(shape = var_2721, x = transpose_57)[name = string("x_61")]; + int32 var_2727 = const()[name = string("op_2727"), val = int32(-1)]; + fp16 const_38_promoted = const()[name = string("const_38_promoted"), val = fp16(-0x1p+0)]; + tensor var_2729 = mul(x = x_61, y = const_38_promoted)[name = string("op_2729")]; + bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; + tensor input_99 = concat(axis = var_2727, interleave = input_99_interleave_0, values = (x_61, var_2729))[name = string("input_99")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_2724_to_fp16 = const()[name = string("op_2724_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_2724_to_fp16, x = input_99)[name = string("normed_93_cast_fp16")]; + tensor var_2734_split_sizes_0 = const()[name = string("op_2734_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2734_axis_0 = const()[name = string("op_2734_axis_0"), val = int32(-1)]; + tensor var_2734_0, tensor var_2734_1 = split(axis = var_2734_axis_0, split_sizes = var_2734_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_2734")]; + tensor q_41 = mul(x = var_2734_0, y = layers_3_self_attn_q_norm_weight)[name = string("q_41")]; + tensor var_2741 = const()[name = string("op_2741"), val = tensor([1, 3, 8, 256])]; + tensor var_2742 = reshape(shape = var_2741, x = q_41)[name = string("op_2742")]; + tensor var_2747 = const()[name = string("op_2747"), val = tensor([0, 2, 1, 3])]; + tensor q_43 = transpose(perm = var_2747, x = var_2742)[name = string("transpose_177")]; + tensor var_2749_cast_fp16 = mul(x = q_43, y = cos_s)[name = string("op_2749_cast_fp16")]; + tensor var_2750_split_sizes_0 = const()[name = string("op_2750_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2750_axis_0 = const()[name = string("op_2750_axis_0"), val = int32(-1)]; + tensor var_2750_0, tensor var_2750_1 = split(axis = var_2750_axis_0, split_sizes = var_2750_split_sizes_0, x = q_43)[name = string("op_2750")]; + fp16 const_39_promoted = const()[name = string("const_39_promoted"), val = fp16(-0x1p+0)]; + tensor var_2752 = mul(x = var_2750_1, y = const_39_promoted)[name = string("op_2752")]; + int32 var_2754 = const()[name = string("op_2754"), val = int32(-1)]; + bool var_2755_interleave_0 = const()[name = string("op_2755_interleave_0"), val = bool(false)]; + tensor var_2755 = concat(axis = var_2754, interleave = var_2755_interleave_0, values = (var_2752, var_2750_0))[name = string("op_2755")]; + tensor var_2756_cast_fp16 = mul(x = var_2755, y = sin_s)[name = string("op_2756_cast_fp16")]; + tensor q_47_cast_fp16 = add(x = var_2749_cast_fp16, y = var_2756_cast_fp16)[name = string("q_47_cast_fp16")]; + string k_19_pad_type_0 = const()[name = string("k_19_pad_type_0"), val = string("valid")]; + tensor k_19_strides_0 = const()[name = string("k_19_strides_0"), val = tensor([1, 1])]; + tensor k_19_pad_0 = const()[name = string("k_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_19_dilations_0 = const()[name = string("k_19_dilations_0"), val = tensor([1, 1])]; + int32 k_19_groups_0 = const()[name = string("k_19_groups_0"), val = int32(1)]; + tensor k_19 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = var_2676_cast_fp16)[name = string("k_19")]; + tensor var_2774 = const()[name = string("op_2774"), val = tensor([1, 2, 256, 3])]; + tensor var_2775 = reshape(shape = var_2774, x = k_19)[name = string("op_2775")]; + tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_7_pad_type_0 = const()[name = string("v_7_pad_type_0"), val = string("valid")]; + tensor v_7_strides_0 = const()[name = string("v_7_strides_0"), val = tensor([1, 1])]; + tensor v_7_pad_0 = const()[name = string("v_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_7_dilations_0 = const()[name = string("v_7_dilations_0"), val = tensor([1, 1])]; + int32 v_7_groups_0 = const()[name = string("v_7_groups_0"), val = int32(1)]; + tensor v_7 = conv(dilations = v_7_dilations_0, groups = v_7_groups_0, pad = v_7_pad_0, pad_type = v_7_pad_type_0, strides = v_7_strides_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = var_2676_cast_fp16)[name = string("v_7")]; + tensor var_2802 = const()[name = string("op_2802"), val = tensor([1, 2, 256, 3])]; + tensor var_2803 = reshape(shape = var_2802, x = v_7)[name = string("op_2803")]; + tensor var_2808 = const()[name = string("op_2808"), val = tensor([0, 1, 3, 2])]; + tensor var_2826 = const()[name = string("op_2826"), val = tensor([3, 2, 256])]; + tensor transpose_58 = transpose(perm = transpose_58_perm_0, x = var_2775)[name = string("transpose_176")]; + tensor x_63 = reshape(shape = var_2826, x = transpose_58)[name = string("x_63")]; + int32 var_2832 = const()[name = string("op_2832"), val = int32(-1)]; + fp16 const_40_promoted = const()[name = string("const_40_promoted"), val = fp16(-0x1p+0)]; + tensor var_2834 = mul(x = x_63, y = const_40_promoted)[name = string("op_2834")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101 = concat(axis = var_2832, interleave = input_101_interleave_0, values = (x_63, var_2834))[name = string("input_101")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_2829_to_fp16 = const()[name = string("op_2829_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_2829_to_fp16, x = input_101)[name = string("normed_97_cast_fp16")]; + tensor var_2839_split_sizes_0 = const()[name = string("op_2839_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2839_axis_0 = const()[name = string("op_2839_axis_0"), val = int32(-1)]; + tensor var_2839_0, tensor var_2839_1 = split(axis = var_2839_axis_0, split_sizes = var_2839_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2839")]; + tensor k_23 = mul(x = var_2839_0, y = layers_3_self_attn_k_norm_weight)[name = string("k_23")]; + tensor var_2846 = const()[name = string("op_2846"), val = tensor([1, 3, 2, 256])]; + tensor var_2847 = reshape(shape = var_2846, x = k_23)[name = string("op_2847")]; + tensor var_2852 = const()[name = string("op_2852"), val = tensor([0, 2, 1, 3])]; + fp16 var_2854_promoted = const()[name = string("op_2854_promoted"), val = fp16(0x1p+1)]; + tensor var_2809 = transpose(perm = var_2808, x = var_2803)[name = string("transpose_175")]; + tensor var_2855 = pow(x = var_2809, y = var_2854_promoted)[name = string("op_2855")]; + tensor var_2860_axes_0 = const()[name = string("op_2860_axes_0"), val = tensor([-1])]; + bool var_2860_keep_dims_0 = const()[name = string("op_2860_keep_dims_0"), val = bool(true)]; + tensor var_2860 = reduce_mean(axes = var_2860_axes_0, keep_dims = var_2860_keep_dims_0, x = var_2855)[name = string("op_2860")]; + fp16 var_2862_to_fp16 = const()[name = string("op_2862_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_7_cast_fp16 = add(x = var_2860, y = var_2862_to_fp16)[name = string("mean_sq_7_cast_fp16")]; + fp32 var_2864_epsilon_0 = const()[name = string("op_2864_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2864_cast_fp16 = rsqrt(epsilon = var_2864_epsilon_0, x = mean_sq_7_cast_fp16)[name = string("op_2864_cast_fp16")]; + tensor input_105_cast_fp16 = mul(x = var_2809, y = var_2864_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor q_45 = transpose(perm = var_2852, x = var_2847)[name = string("transpose_174")]; + tensor var_2866_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_2866_cast_fp16")]; + tensor var_2867_split_sizes_0 = const()[name = string("op_2867_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2867_axis_0 = const()[name = string("op_2867_axis_0"), val = int32(-1)]; + tensor var_2867_0, tensor var_2867_1 = split(axis = var_2867_axis_0, split_sizes = var_2867_split_sizes_0, x = q_45)[name = string("op_2867")]; + fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; + tensor var_2869 = mul(x = var_2867_1, y = const_41_promoted)[name = string("op_2869")]; + int32 var_2871 = const()[name = string("op_2871"), val = int32(-1)]; + bool var_2872_interleave_0 = const()[name = string("op_2872_interleave_0"), val = bool(false)]; + tensor var_2872 = concat(axis = var_2871, interleave = var_2872_interleave_0, values = (var_2869, var_2867_0))[name = string("op_2872")]; + tensor var_2873_cast_fp16 = mul(x = var_2872, y = sin_s)[name = string("op_2873_cast_fp16")]; + tensor input_103_cast_fp16 = add(x = var_2866_cast_fp16, y = var_2873_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor k_padded_7_pad_0 = const()[name = string("k_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_7_mode_0 = const()[name = string("k_padded_7_mode_0"), val = string("constant")]; + fp16 const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_7_cast_fp16 = pad(constant_val = const_42_to_fp16, mode = k_padded_7_mode_0, pad = k_padded_7_pad_0, x = input_103_cast_fp16)[name = string("k_padded_7_cast_fp16")]; + tensor v_padded_7_pad_0 = const()[name = string("v_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_7_mode_0 = const()[name = string("v_padded_7_mode_0"), val = string("constant")]; + fp16 const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_7_cast_fp16 = pad(constant_val = const_43_to_fp16, mode = v_padded_7_mode_0, pad = v_padded_7_pad_0, x = input_105_cast_fp16)[name = string("v_padded_7_cast_fp16")]; + tensor slot_k_7_begin_0 = const()[name = string("slot_k_7_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor slot_k_7_end_0 = const()[name = string("slot_k_7_end_0"), val = tensor([4, 2, 512, 512])]; + tensor slot_k_7_end_mask_0 = const()[name = string("slot_k_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_7_cast_fp16 = slice_by_index(begin = slot_k_7_begin_0, end = slot_k_7_end_0, end_mask = slot_k_7_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("slot_k_7_cast_fp16")]; + tensor slot_v_7_begin_0 = const()[name = string("slot_v_7_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor slot_v_7_end_0 = const()[name = string("slot_v_7_end_0"), val = tensor([4, 2, 512, 512])]; + tensor slot_v_7_end_mask_0 = const()[name = string("slot_v_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_7_cast_fp16 = slice_by_index(begin = slot_v_7_begin_0, end = slot_v_7_end_0, end_mask = slot_v_7_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("slot_v_7_cast_fp16")]; + tensor var_2912_begin_0 = const()[name = string("op_2912_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_2912_end_0 = const()[name = string("op_2912_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2912_end_mask_0 = const()[name = string("op_2912_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2912_cast_fp16 = slice_by_index(begin = var_2912_begin_0, end = var_2912_end_0, end_mask = var_2912_end_mask_0, x = slot_k_7_cast_fp16)[name = string("op_2912_cast_fp16")]; + int32 var_2919 = const()[name = string("op_2919"), val = int32(2)]; + bool new_k_7_interleave_0 = const()[name = string("new_k_7_interleave_0"), val = bool(false)]; + tensor new_k_7_cast_fp16 = concat(axis = var_2919, interleave = new_k_7_interleave_0, values = (var_2912_cast_fp16, k_padded_7_cast_fp16))[name = string("new_k_7_cast_fp16")]; + tensor var_2935_begin_0 = const()[name = string("op_2935_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_2935_end_0 = const()[name = string("op_2935_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2935_end_mask_0 = const()[name = string("op_2935_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2935_cast_fp16 = slice_by_index(begin = var_2935_begin_0, end = var_2935_end_0, end_mask = var_2935_end_mask_0, x = slot_v_7_cast_fp16)[name = string("op_2935_cast_fp16")]; + int32 var_2942 = const()[name = string("op_2942"), val = int32(2)]; + bool new_v_7_interleave_0 = const()[name = string("new_v_7_interleave_0"), val = bool(false)]; + tensor new_v_7_cast_fp16 = concat(axis = var_2942, interleave = new_v_7_interleave_0, values = (var_2935_cast_fp16, v_padded_7_cast_fp16))[name = string("new_v_7_cast_fp16")]; + tensor var_2948_begin_0 = const()[name = string("op_2948_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2948_end_0 = const()[name = string("op_2948_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2948_end_mask_0 = const()[name = string("op_2948_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2948_cast_fp16 = slice_by_index(begin = var_2948_begin_0, end = var_2948_end_0, end_mask = var_2948_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2948_cast_fp16")]; + tensor var_2953_begin_0 = const()[name = string("op_2953_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_2953_end_0 = const()[name = string("op_2953_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_2953_end_mask_0 = const()[name = string("op_2953_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2953_cast_fp16 = slice_by_index(begin = var_2953_begin_0, end = var_2953_end_0, end_mask = var_2953_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2953_cast_fp16")]; + int32 var_2955 = const()[name = string("op_2955"), val = int32(0)]; + bool K_sliding_out_7_interleave_0 = const()[name = string("K_sliding_out_7_interleave_0"), val = bool(false)]; + tensor K_sliding_out_7_cast_fp16 = concat(axis = var_2955, interleave = K_sliding_out_7_interleave_0, values = (var_2948_cast_fp16, new_k_7_cast_fp16, var_2953_cast_fp16))[name = string("K_sliding_out_7_cast_fp16")]; + tensor var_2961_begin_0 = const()[name = string("op_2961_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2961_end_0 = const()[name = string("op_2961_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2961_end_mask_0 = const()[name = string("op_2961_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2961_cast_fp16 = slice_by_index(begin = var_2961_begin_0, end = var_2961_end_0, end_mask = var_2961_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2961_cast_fp16")]; + tensor var_2966_begin_0 = const()[name = string("op_2966_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_2966_end_0 = const()[name = string("op_2966_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_2966_end_mask_0 = const()[name = string("op_2966_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2966_cast_fp16 = slice_by_index(begin = var_2966_begin_0, end = var_2966_end_0, end_mask = var_2966_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2966_cast_fp16")]; + int32 var_2968 = const()[name = string("op_2968"), val = int32(0)]; + bool V_sliding_out_7_interleave_0 = const()[name = string("V_sliding_out_7_interleave_0"), val = bool(false)]; + tensor V_sliding_out_7_cast_fp16 = concat(axis = var_2968, interleave = V_sliding_out_7_interleave_0, values = (var_2961_cast_fp16, new_v_7_cast_fp16, var_2966_cast_fp16))[name = string("V_sliding_out_7_cast_fp16")]; + tensor var_2974_begin_0 = const()[name = string("op_2974_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2974_end_0 = const()[name = string("op_2974_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2974_end_mask_0 = const()[name = string("op_2974_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2974_cast_fp16 = slice_by_index(begin = var_2974_begin_0, end = var_2974_end_0, end_mask = var_2974_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("op_2974_cast_fp16")]; + tensor K_for_attn_7_begin_0 = const()[name = string("K_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_7_end_0 = const()[name = string("K_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_7_end_mask_0 = const()[name = string("K_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_7_cast_fp16 = slice_by_index(begin = K_for_attn_7_begin_0, end = K_for_attn_7_end_0, end_mask = K_for_attn_7_end_mask_0, x = var_2974_cast_fp16)[name = string("K_for_attn_7_cast_fp16")]; + tensor var_2984_begin_0 = const()[name = string("op_2984_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2984_end_0 = const()[name = string("op_2984_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2984_end_mask_0 = const()[name = string("op_2984_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2984_cast_fp16 = slice_by_index(begin = var_2984_begin_0, end = var_2984_end_0, end_mask = var_2984_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("op_2984_cast_fp16")]; + tensor V_for_attn_7_begin_0 = const()[name = string("V_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_7_end_0 = const()[name = string("V_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_7_end_mask_0 = const()[name = string("V_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_7_cast_fp16 = slice_by_index(begin = V_for_attn_7_begin_0, end = V_for_attn_7_end_0, end_mask = V_for_attn_7_end_mask_0, x = var_2984_cast_fp16)[name = string("V_for_attn_7_cast_fp16")]; + tensor transpose_12_perm_0 = const()[name = string("transpose_12_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_6_reps_0 = const()[name = string("tile_6_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = K_for_attn_7_cast_fp16)[name = string("transpose_173")]; + tensor tile_6_cast_fp16 = tile(reps = tile_6_reps_0, x = transpose_12_cast_fp16)[name = string("tile_6_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_12, x = tile_6_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = reshape_12_cast_fp16)[name = string("transpose_172")]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_13, x = transpose_13_cast_fp16)[name = string("reshape_13_cast_fp16")]; + tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_14_perm_0 = const()[name = string("transpose_14_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_7_reps_0 = const()[name = string("tile_7_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = V_for_attn_7_cast_fp16)[name = string("transpose_171")]; + tensor tile_7_cast_fp16 = tile(reps = tile_7_reps_0, x = transpose_14_cast_fp16)[name = string("tile_7_cast_fp16")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_14, x = tile_7_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor transpose_15_perm_0 = const()[name = string("transpose_15_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_170")]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_15, x = transpose_15_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor V_expanded_7_perm_0 = const()[name = string("V_expanded_7_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor transpose_59_cast_fp16 = transpose(perm = transpose_59_perm_0, x = reshape_13_cast_fp16)[name = string("transpose_169")]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_47_cast_fp16, y = transpose_59_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_3")]; + tensor var_3019 = sub(x = x_67_cast_fp16, y = reduce_max_3)[name = string("op_3019")]; + tensor var_3025 = exp(x = var_3019)[name = string("op_3025")]; + tensor var_3035_axes_0 = const()[name = string("op_3035_axes_0"), val = tensor([-1])]; + bool var_3035_keep_dims_0 = const()[name = string("op_3035_keep_dims_0"), val = bool(true)]; + tensor var_3035 = reduce_sum(axes = var_3035_axes_0, keep_dims = var_3035_keep_dims_0, x = var_3025)[name = string("op_3035")]; + tensor var_3041_cast_fp16 = real_div(x = var_3025, y = var_3035)[name = string("op_3041_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor V_expanded_7_cast_fp16 = transpose(perm = V_expanded_7_perm_0, x = reshape_15_cast_fp16)[name = string("transpose_168")]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_3041_cast_fp16, y = V_expanded_7_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_3052 = const()[name = string("op_3052"), val = tensor([0, 2, 1, 3])]; + tensor var_3059 = const()[name = string("op_3059"), val = tensor([1, 3, -1])]; + tensor var_3053_cast_fp16 = transpose(perm = var_3052, x = attn_output_19_cast_fp16)[name = string("transpose_167")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_3059, x = var_3053_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_3064 = const()[name = string("op_3064"), val = tensor([0, 2, 1])]; + string var_3080_pad_type_0 = const()[name = string("op_3080_pad_type_0"), val = string("valid")]; + int32 var_3080_groups_0 = const()[name = string("op_3080_groups_0"), val = int32(1)]; + tensor var_3080_strides_0 = const()[name = string("op_3080_strides_0"), val = tensor([1])]; + tensor var_3080_pad_0 = const()[name = string("op_3080_pad_0"), val = tensor([0, 0])]; + tensor var_3080_dilations_0 = const()[name = string("op_3080_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553955648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556577152))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3065_cast_fp16 = transpose(perm = var_3064, x = attn_output_21_cast_fp16)[name = string("transpose_166")]; + tensor var_3080_cast_fp16 = conv(dilations = var_3080_dilations_0, groups = var_3080_groups_0, pad = var_3080_pad_0, pad_type = var_3080_pad_type_0, strides = var_3080_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3065_cast_fp16)[name = string("op_3080_cast_fp16")]; + tensor var_3084 = const()[name = string("op_3084"), val = tensor([0, 2, 1])]; + int32 var_3090 = const()[name = string("op_3090"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_3084, x = var_3080_cast_fp16)[name = string("transpose_165")]; + tensor var_3092_cast_fp16 = mul(x = x_71_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_3092_cast_fp16")]; + bool input_109_interleave_0 = const()[name = string("input_109_interleave_0"), val = bool(false)]; + tensor input_109_cast_fp16 = concat(axis = var_3090, interleave = input_109_interleave_0, values = (x_71_cast_fp16, var_3092_cast_fp16))[name = string("input_109_cast_fp16")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_3087_to_fp16 = const()[name = string("op_3087_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_3087_to_fp16, x = input_109_cast_fp16)[name = string("normed_101_cast_fp16")]; + tensor var_3097_split_sizes_0 = const()[name = string("op_3097_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3097_axis_0 = const()[name = string("op_3097_axis_0"), val = int32(-1)]; + tensor var_3097_cast_fp16_0, tensor var_3097_cast_fp16_1 = split(axis = var_3097_axis_0, split_sizes = var_3097_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_3097_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556579776)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_3097_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_59_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_3106 = const()[name = string("op_3106"), val = int32(-1)]; + fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3108_cast_fp16 = mul(x = x_73_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_3108_cast_fp16")]; + bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; + tensor input_111_cast_fp16 = concat(axis = var_3106, interleave = input_111_interleave_0, values = (x_73_cast_fp16, var_3108_cast_fp16))[name = string("input_111_cast_fp16")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_3103_to_fp16 = const()[name = string("op_3103_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_3103_to_fp16, x = input_111_cast_fp16)[name = string("normed_105_cast_fp16")]; + tensor var_3113_split_sizes_0 = const()[name = string("op_3113_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3113_axis_0 = const()[name = string("op_3113_axis_0"), val = int32(-1)]; + tensor var_3113_cast_fp16_0, tensor var_3113_cast_fp16_1 = split(axis = var_3113_axis_0, split_sizes = var_3113_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_3113_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556584960)))]; + tensor h_21_cast_fp16 = mul(x = var_3113_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_3124 = const()[name = string("op_3124"), val = tensor([0, 2, 1])]; + tensor input_113_axes_0 = const()[name = string("input_113_axes_0"), val = tensor([2])]; + tensor var_3125 = transpose(perm = var_3124, x = h_21_cast_fp16)[name = string("transpose_164")]; + tensor input_113 = expand_dims(axes = input_113_axes_0, x = var_3125)[name = string("input_113")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_113)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_113)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_115 = mul(x = gate_15, y = up_7)[name = string("input_115")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_115)[name = string("mlp_out_7")]; + tensor var_3165_axes_0 = const()[name = string("op_3165_axes_0"), val = tensor([2])]; + tensor var_3165 = squeeze(axes = var_3165_axes_0, x = mlp_out_7)[name = string("op_3165")]; + tensor var_3169 = const()[name = string("op_3169"), val = tensor([0, 2, 1])]; + int32 var_3175 = const()[name = string("op_3175"), val = int32(-1)]; + fp16 const_46_promoted = const()[name = string("const_46_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_3169, x = var_3165)[name = string("transpose_163")]; + tensor var_3177 = mul(x = x_75, y = const_46_promoted)[name = string("op_3177")]; + bool input_117_interleave_0 = const()[name = string("input_117_interleave_0"), val = bool(false)]; + tensor input_117 = concat(axis = var_3175, interleave = input_117_interleave_0, values = (x_75, var_3177))[name = string("input_117")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_3172_to_fp16 = const()[name = string("op_3172_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_3172_to_fp16, x = input_117)[name = string("normed_109_cast_fp16")]; + tensor var_3182_split_sizes_0 = const()[name = string("op_3182_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3182_axis_0 = const()[name = string("op_3182_axis_0"), val = int32(-1)]; + tensor var_3182_0, tensor var_3182_1 = split(axis = var_3182_axis_0, split_sizes = var_3182_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_3182")]; + tensor hidden_states_33 = mul(x = var_3182_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 768])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 3, 1024])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_3210 = const()[name = string("op_3210"), val = tensor([0, 2, 1])]; + tensor input_119_axes_0 = const()[name = string("input_119_axes_0"), val = tensor([2])]; + tensor var_3211 = transpose(perm = var_3210, x = hidden_states_35_cast_fp16)[name = string("transpose_162")]; + tensor input_119 = expand_dims(axes = input_119_axes_0, x = var_3211)[name = string("input_119")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_119)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_3230 = const()[name = string("op_3230"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_3231_cast_fp16 = transpose(perm = var_3230, x = per_layer_slice_7_cast_fp16)[name = string("transpose_161")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_3231_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_121_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_121_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556590144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556917888))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_121_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_3247_axes_0 = const()[name = string("op_3247_axes_0"), val = tensor([2])]; + tensor var_3247_cast_fp16 = squeeze(axes = var_3247_axes_0, x = gated_23_cast_fp16)[name = string("op_3247_cast_fp16")]; + tensor var_3251 = const()[name = string("op_3251"), val = tensor([0, 2, 1])]; + int32 var_3257 = const()[name = string("op_3257"), val = int32(-1)]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_3251, x = var_3247_cast_fp16)[name = string("transpose_160")]; + tensor var_3259_cast_fp16 = mul(x = x_77_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_3259_cast_fp16")]; + bool input_123_interleave_0 = const()[name = string("input_123_interleave_0"), val = bool(false)]; + tensor input_123_cast_fp16 = concat(axis = var_3257, interleave = input_123_interleave_0, values = (x_77_cast_fp16, var_3259_cast_fp16))[name = string("input_123_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_3254_to_fp16 = const()[name = string("op_3254_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_3254_to_fp16, x = input_123_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor var_3264_split_sizes_0 = const()[name = string("op_3264_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3264_axis_0 = const()[name = string("op_3264_axis_0"), val = int32(-1)]; + tensor var_3264_cast_fp16_0, tensor var_3264_cast_fp16_1 = split(axis = var_3264_axis_0, split_sizes = var_3264_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_3264_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556920512)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_3264_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = tensor([0x1.6cp-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_48_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + int32 var_3279 = const()[name = string("op_3279"), val = int32(-1)]; + fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3281_cast_fp16 = mul(x = x_79_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_3281_cast_fp16")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125_cast_fp16 = concat(axis = var_3279, interleave = input_125_interleave_0, values = (x_79_cast_fp16, var_3281_cast_fp16))[name = string("input_125_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_3276_to_fp16 = const()[name = string("op_3276_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_3276_to_fp16, x = input_125_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor var_3286_split_sizes_0 = const()[name = string("op_3286_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3286_axis_0 = const()[name = string("op_3286_axis_0"), val = int32(-1)]; + tensor var_3286_cast_fp16_0, tensor var_3286_cast_fp16_1 = split(axis = var_3286_axis_0, split_sizes = var_3286_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_3286_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556925696)))]; + tensor h_25_cast_fp16 = mul(x = var_3286_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_3292 = const()[name = string("op_3292"), val = tensor([0, 2, 1])]; + tensor var_3295_axes_0 = const()[name = string("op_3295_axes_0"), val = tensor([2])]; + tensor var_3293_cast_fp16 = transpose(perm = var_3292, x = h_25_cast_fp16)[name = string("transpose_159")]; + tensor var_3295_cast_fp16 = expand_dims(axes = var_3295_axes_0, x = var_3293_cast_fp16)[name = string("op_3295_cast_fp16")]; + string q_49_pad_type_0 = const()[name = string("q_49_pad_type_0"), val = string("valid")]; + tensor q_49_strides_0 = const()[name = string("q_49_strides_0"), val = tensor([1, 1])]; + tensor q_49_pad_0 = const()[name = string("q_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_49_dilations_0 = const()[name = string("q_49_dilations_0"), val = tensor([1, 1])]; + int32 q_49_groups_0 = const()[name = string("q_49_groups_0"), val = int32(1)]; + tensor q_49 = conv(dilations = q_49_dilations_0, groups = q_49_groups_0, pad = q_49_pad_0, pad_type = q_49_pad_type_0, strides = q_49_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_3295_cast_fp16)[name = string("q_49")]; + tensor var_3316 = const()[name = string("op_3316"), val = tensor([1, 8, 256, 3])]; + tensor var_3317 = reshape(shape = var_3316, x = q_49)[name = string("op_3317")]; + tensor transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_3340 = const()[name = string("op_3340"), val = tensor([3, 8, 256])]; + tensor transpose_60 = transpose(perm = transpose_60_perm_0, x = var_3317)[name = string("transpose_158")]; + tensor x_81 = reshape(shape = var_3340, x = transpose_60)[name = string("x_81")]; + int32 var_3346 = const()[name = string("op_3346"), val = int32(-1)]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_3348 = mul(x = x_81, y = const_50_promoted)[name = string("op_3348")]; + bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)]; + tensor input_129 = concat(axis = var_3346, interleave = input_129_interleave_0, values = (x_81, var_3348))[name = string("input_129")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_3343_to_fp16 = const()[name = string("op_3343_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_3343_to_fp16, x = input_129)[name = string("normed_121_cast_fp16")]; + tensor var_3353_split_sizes_0 = const()[name = string("op_3353_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3353_axis_0 = const()[name = string("op_3353_axis_0"), val = int32(-1)]; + tensor var_3353_0, tensor var_3353_1 = split(axis = var_3353_axis_0, split_sizes = var_3353_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_3353")]; + tensor q_53 = mul(x = var_3353_0, y = layers_4_self_attn_q_norm_weight)[name = string("q_53")]; + tensor var_3360 = const()[name = string("op_3360"), val = tensor([1, 3, 8, 256])]; + tensor var_3361 = reshape(shape = var_3360, x = q_53)[name = string("op_3361")]; + tensor var_3366 = const()[name = string("op_3366"), val = tensor([0, 2, 1, 3])]; + tensor q_55 = transpose(perm = var_3366, x = var_3361)[name = string("transpose_157")]; + tensor var_3368_cast_fp16 = mul(x = q_55, y = cos_s)[name = string("op_3368_cast_fp16")]; + tensor var_3369_split_sizes_0 = const()[name = string("op_3369_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3369_axis_0 = const()[name = string("op_3369_axis_0"), val = int32(-1)]; + tensor var_3369_0, tensor var_3369_1 = split(axis = var_3369_axis_0, split_sizes = var_3369_split_sizes_0, x = q_55)[name = string("op_3369")]; + fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; + tensor var_3371 = mul(x = var_3369_1, y = const_51_promoted)[name = string("op_3371")]; + int32 var_3373 = const()[name = string("op_3373"), val = int32(-1)]; + bool var_3374_interleave_0 = const()[name = string("op_3374_interleave_0"), val = bool(false)]; + tensor var_3374 = concat(axis = var_3373, interleave = var_3374_interleave_0, values = (var_3371, var_3369_0))[name = string("op_3374")]; + tensor var_3375_cast_fp16 = mul(x = var_3374, y = sin_s)[name = string("op_3375_cast_fp16")]; + tensor q_59_cast_fp16 = add(x = var_3368_cast_fp16, y = var_3375_cast_fp16)[name = string("q_59_cast_fp16")]; + string k_25_pad_type_0 = const()[name = string("k_25_pad_type_0"), val = string("valid")]; + tensor k_25_strides_0 = const()[name = string("k_25_strides_0"), val = tensor([1, 1])]; + tensor k_25_pad_0 = const()[name = string("k_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_25_dilations_0 = const()[name = string("k_25_dilations_0"), val = tensor([1, 1])]; + int32 k_25_groups_0 = const()[name = string("k_25_groups_0"), val = int32(1)]; + tensor k_25 = conv(dilations = k_25_dilations_0, groups = k_25_groups_0, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = k_25_strides_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = var_3295_cast_fp16)[name = string("k_25")]; + tensor var_3393 = const()[name = string("op_3393"), val = tensor([1, 2, 256, 3])]; + tensor var_3394 = reshape(shape = var_3393, x = k_25)[name = string("op_3394")]; + tensor transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_9_pad_type_0 = const()[name = string("v_9_pad_type_0"), val = string("valid")]; + tensor v_9_strides_0 = const()[name = string("v_9_strides_0"), val = tensor([1, 1])]; + tensor v_9_pad_0 = const()[name = string("v_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_9_dilations_0 = const()[name = string("v_9_dilations_0"), val = tensor([1, 1])]; + int32 v_9_groups_0 = const()[name = string("v_9_groups_0"), val = int32(1)]; + tensor v_9 = conv(dilations = v_9_dilations_0, groups = v_9_groups_0, pad = v_9_pad_0, pad_type = v_9_pad_type_0, strides = v_9_strides_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = var_3295_cast_fp16)[name = string("v_9")]; + tensor var_3421 = const()[name = string("op_3421"), val = tensor([1, 2, 256, 3])]; + tensor var_3422 = reshape(shape = var_3421, x = v_9)[name = string("op_3422")]; + tensor var_3427 = const()[name = string("op_3427"), val = tensor([0, 1, 3, 2])]; + tensor var_3445 = const()[name = string("op_3445"), val = tensor([3, 2, 256])]; + tensor transpose_61 = transpose(perm = transpose_61_perm_0, x = var_3394)[name = string("transpose_156")]; + tensor x_83 = reshape(shape = var_3445, x = transpose_61)[name = string("x_83")]; + int32 var_3451 = const()[name = string("op_3451"), val = int32(-1)]; + fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)]; + tensor var_3453 = mul(x = x_83, y = const_52_promoted)[name = string("op_3453")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131 = concat(axis = var_3451, interleave = input_131_interleave_0, values = (x_83, var_3453))[name = string("input_131")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_3448_to_fp16 = const()[name = string("op_3448_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_3448_to_fp16, x = input_131)[name = string("normed_125_cast_fp16")]; + tensor var_3458_split_sizes_0 = const()[name = string("op_3458_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3458_axis_0 = const()[name = string("op_3458_axis_0"), val = int32(-1)]; + tensor var_3458_0, tensor var_3458_1 = split(axis = var_3458_axis_0, split_sizes = var_3458_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_3458")]; + tensor k_29 = mul(x = var_3458_0, y = layers_4_self_attn_k_norm_weight)[name = string("k_29")]; + tensor var_3465 = const()[name = string("op_3465"), val = tensor([1, 3, 2, 256])]; + tensor var_3466 = reshape(shape = var_3465, x = k_29)[name = string("op_3466")]; + tensor var_3471 = const()[name = string("op_3471"), val = tensor([0, 2, 1, 3])]; + fp16 var_3473_promoted = const()[name = string("op_3473_promoted"), val = fp16(0x1p+1)]; + tensor var_3428 = transpose(perm = var_3427, x = var_3422)[name = string("transpose_155")]; + tensor var_3474 = pow(x = var_3428, y = var_3473_promoted)[name = string("op_3474")]; + tensor var_3479_axes_0 = const()[name = string("op_3479_axes_0"), val = tensor([-1])]; + bool var_3479_keep_dims_0 = const()[name = string("op_3479_keep_dims_0"), val = bool(true)]; + tensor var_3479 = reduce_mean(axes = var_3479_axes_0, keep_dims = var_3479_keep_dims_0, x = var_3474)[name = string("op_3479")]; + fp16 var_3481_to_fp16 = const()[name = string("op_3481_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_9_cast_fp16 = add(x = var_3479, y = var_3481_to_fp16)[name = string("mean_sq_9_cast_fp16")]; + fp32 var_3483_epsilon_0 = const()[name = string("op_3483_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3483_cast_fp16 = rsqrt(epsilon = var_3483_epsilon_0, x = mean_sq_9_cast_fp16)[name = string("op_3483_cast_fp16")]; + tensor input_135_cast_fp16 = mul(x = var_3428, y = var_3483_cast_fp16)[name = string("input_135_cast_fp16")]; + tensor q_57 = transpose(perm = var_3471, x = var_3466)[name = string("transpose_154")]; + tensor var_3485_cast_fp16 = mul(x = q_57, y = cos_s)[name = string("op_3485_cast_fp16")]; + tensor var_3486_split_sizes_0 = const()[name = string("op_3486_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3486_axis_0 = const()[name = string("op_3486_axis_0"), val = int32(-1)]; + tensor var_3486_0, tensor var_3486_1 = split(axis = var_3486_axis_0, split_sizes = var_3486_split_sizes_0, x = q_57)[name = string("op_3486")]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor var_3488 = mul(x = var_3486_1, y = const_53_promoted)[name = string("op_3488")]; + int32 var_3490 = const()[name = string("op_3490"), val = int32(-1)]; + bool var_3491_interleave_0 = const()[name = string("op_3491_interleave_0"), val = bool(false)]; + tensor var_3491 = concat(axis = var_3490, interleave = var_3491_interleave_0, values = (var_3488, var_3486_0))[name = string("op_3491")]; + tensor var_3492_cast_fp16 = mul(x = var_3491, y = sin_s)[name = string("op_3492_cast_fp16")]; + tensor input_133_cast_fp16 = add(x = var_3485_cast_fp16, y = var_3492_cast_fp16)[name = string("input_133_cast_fp16")]; + tensor k_padded_9_pad_0 = const()[name = string("k_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_9_mode_0 = const()[name = string("k_padded_9_mode_0"), val = string("constant")]; + fp16 const_54_to_fp16 = const()[name = string("const_54_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_9_cast_fp16 = pad(constant_val = const_54_to_fp16, mode = k_padded_9_mode_0, pad = k_padded_9_pad_0, x = input_133_cast_fp16)[name = string("k_padded_9_cast_fp16")]; + tensor v_padded_9_pad_0 = const()[name = string("v_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_9_mode_0 = const()[name = string("v_padded_9_mode_0"), val = string("constant")]; + fp16 const_55_to_fp16 = const()[name = string("const_55_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_9_cast_fp16 = pad(constant_val = const_55_to_fp16, mode = v_padded_9_mode_0, pad = v_padded_9_pad_0, x = input_135_cast_fp16)[name = string("v_padded_9_cast_fp16")]; + tensor slot_k_9_begin_0 = const()[name = string("slot_k_9_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor slot_k_9_end_0 = const()[name = string("slot_k_9_end_0"), val = tensor([5, 2, 512, 512])]; + tensor slot_k_9_end_mask_0 = const()[name = string("slot_k_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_9_cast_fp16 = slice_by_index(begin = slot_k_9_begin_0, end = slot_k_9_end_0, end_mask = slot_k_9_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("slot_k_9_cast_fp16")]; + tensor slot_v_9_begin_0 = const()[name = string("slot_v_9_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor slot_v_9_end_0 = const()[name = string("slot_v_9_end_0"), val = tensor([5, 2, 512, 512])]; + tensor slot_v_9_end_mask_0 = const()[name = string("slot_v_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_9_cast_fp16 = slice_by_index(begin = slot_v_9_begin_0, end = slot_v_9_end_0, end_mask = slot_v_9_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("slot_v_9_cast_fp16")]; + tensor var_3531_begin_0 = const()[name = string("op_3531_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_3531_end_0 = const()[name = string("op_3531_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3531_end_mask_0 = const()[name = string("op_3531_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3531_cast_fp16 = slice_by_index(begin = var_3531_begin_0, end = var_3531_end_0, end_mask = var_3531_end_mask_0, x = slot_k_9_cast_fp16)[name = string("op_3531_cast_fp16")]; + int32 var_3538 = const()[name = string("op_3538"), val = int32(2)]; + bool new_k_9_interleave_0 = const()[name = string("new_k_9_interleave_0"), val = bool(false)]; + tensor new_k_9_cast_fp16 = concat(axis = var_3538, interleave = new_k_9_interleave_0, values = (var_3531_cast_fp16, k_padded_9_cast_fp16))[name = string("new_k_9_cast_fp16")]; + tensor var_3554_begin_0 = const()[name = string("op_3554_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_3554_end_0 = const()[name = string("op_3554_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3554_end_mask_0 = const()[name = string("op_3554_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3554_cast_fp16 = slice_by_index(begin = var_3554_begin_0, end = var_3554_end_0, end_mask = var_3554_end_mask_0, x = slot_v_9_cast_fp16)[name = string("op_3554_cast_fp16")]; + int32 var_3561 = const()[name = string("op_3561"), val = int32(2)]; + bool new_v_9_interleave_0 = const()[name = string("new_v_9_interleave_0"), val = bool(false)]; + tensor new_v_9_cast_fp16 = concat(axis = var_3561, interleave = new_v_9_interleave_0, values = (var_3554_cast_fp16, v_padded_9_cast_fp16))[name = string("new_v_9_cast_fp16")]; + tensor var_3567_begin_0 = const()[name = string("op_3567_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3567_end_0 = const()[name = string("op_3567_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_3567_end_mask_0 = const()[name = string("op_3567_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3567_cast_fp16 = slice_by_index(begin = var_3567_begin_0, end = var_3567_end_0, end_mask = var_3567_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("op_3567_cast_fp16")]; + tensor var_3572_begin_0 = const()[name = string("op_3572_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_3572_end_0 = const()[name = string("op_3572_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_3572_end_mask_0 = const()[name = string("op_3572_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3572_cast_fp16 = slice_by_index(begin = var_3572_begin_0, end = var_3572_end_0, end_mask = var_3572_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("op_3572_cast_fp16")]; + int32 var_3574 = const()[name = string("op_3574"), val = int32(0)]; + bool K_sliding_out_9_interleave_0 = const()[name = string("K_sliding_out_9_interleave_0"), val = bool(false)]; + tensor K_sliding_out_9_cast_fp16 = concat(axis = var_3574, interleave = K_sliding_out_9_interleave_0, values = (var_3567_cast_fp16, new_k_9_cast_fp16, var_3572_cast_fp16))[name = string("K_sliding_out_9_cast_fp16")]; + tensor var_3580_begin_0 = const()[name = string("op_3580_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3580_end_0 = const()[name = string("op_3580_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_3580_end_mask_0 = const()[name = string("op_3580_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3580_cast_fp16 = slice_by_index(begin = var_3580_begin_0, end = var_3580_end_0, end_mask = var_3580_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("op_3580_cast_fp16")]; + tensor var_3585_begin_0 = const()[name = string("op_3585_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_3585_end_0 = const()[name = string("op_3585_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_3585_end_mask_0 = const()[name = string("op_3585_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3585_cast_fp16 = slice_by_index(begin = var_3585_begin_0, end = var_3585_end_0, end_mask = var_3585_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("op_3585_cast_fp16")]; + int32 var_3587 = const()[name = string("op_3587"), val = int32(0)]; + bool V_sliding_out_9_interleave_0 = const()[name = string("V_sliding_out_9_interleave_0"), val = bool(false)]; + tensor V_sliding_out_9_cast_fp16 = concat(axis = var_3587, interleave = V_sliding_out_9_interleave_0, values = (var_3580_cast_fp16, new_v_9_cast_fp16, var_3585_cast_fp16))[name = string("V_sliding_out_9_cast_fp16")]; + tensor var_3593_begin_0 = const()[name = string("op_3593_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3593_end_0 = const()[name = string("op_3593_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_3593_end_mask_0 = const()[name = string("op_3593_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3593_cast_fp16 = slice_by_index(begin = var_3593_begin_0, end = var_3593_end_0, end_mask = var_3593_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("op_3593_cast_fp16")]; + tensor K_for_attn_9_begin_0 = const()[name = string("K_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_9_end_0 = const()[name = string("K_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_9_end_mask_0 = const()[name = string("K_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_9_cast_fp16 = slice_by_index(begin = K_for_attn_9_begin_0, end = K_for_attn_9_end_0, end_mask = K_for_attn_9_end_mask_0, x = var_3593_cast_fp16)[name = string("K_for_attn_9_cast_fp16")]; + tensor var_3603_begin_0 = const()[name = string("op_3603_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3603_end_0 = const()[name = string("op_3603_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_3603_end_mask_0 = const()[name = string("op_3603_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3603_cast_fp16 = slice_by_index(begin = var_3603_begin_0, end = var_3603_end_0, end_mask = var_3603_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("op_3603_cast_fp16")]; + tensor V_for_attn_9_begin_0 = const()[name = string("V_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_9_end_0 = const()[name = string("V_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_9_end_mask_0 = const()[name = string("V_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_9_cast_fp16 = slice_by_index(begin = V_for_attn_9_begin_0, end = V_for_attn_9_end_0, end_mask = V_for_attn_9_end_mask_0, x = var_3603_cast_fp16)[name = string("V_for_attn_9_cast_fp16")]; + tensor transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = K_for_attn_9_cast_fp16)[name = string("transpose_153")]; + tensor tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_16, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")]; + tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_152")]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_17, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = V_for_attn_9_cast_fp16)[name = string("transpose_151")]; + tensor tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_18, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_150")]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_19, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")]; + tensor V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor transpose_62_cast_fp16 = transpose(perm = transpose_62_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_149")]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_59_cast_fp16, y = transpose_62_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_87_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_87_cast_fp16)[name = string("reduce_max_4")]; + tensor var_3638 = sub(x = x_87_cast_fp16, y = reduce_max_4)[name = string("op_3638")]; + tensor var_3644 = exp(x = var_3638)[name = string("op_3644")]; + tensor var_3654_axes_0 = const()[name = string("op_3654_axes_0"), val = tensor([-1])]; + bool var_3654_keep_dims_0 = const()[name = string("op_3654_keep_dims_0"), val = bool(true)]; + tensor var_3654 = reduce_sum(axes = var_3654_axes_0, keep_dims = var_3654_keep_dims_0, x = var_3644)[name = string("op_3654")]; + tensor var_3660_cast_fp16 = real_div(x = var_3644, y = var_3654)[name = string("op_3660_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_148")]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_3660_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3671 = const()[name = string("op_3671"), val = tensor([0, 2, 1, 3])]; + tensor var_3678 = const()[name = string("op_3678"), val = tensor([1, 3, -1])]; + tensor var_3672_cast_fp16 = transpose(perm = var_3671, x = attn_output_25_cast_fp16)[name = string("transpose_147")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_3678, x = var_3672_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_3683 = const()[name = string("op_3683"), val = tensor([0, 2, 1])]; + string var_3699_pad_type_0 = const()[name = string("op_3699_pad_type_0"), val = string("valid")]; + int32 var_3699_groups_0 = const()[name = string("op_3699_groups_0"), val = int32(1)]; + tensor var_3699_strides_0 = const()[name = string("op_3699_strides_0"), val = tensor([1])]; + tensor var_3699_pad_0 = const()[name = string("op_3699_pad_0"), val = tensor([0, 0])]; + tensor var_3699_dilations_0 = const()[name = string("op_3699_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556930880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559552384))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3684_cast_fp16 = transpose(perm = var_3683, x = attn_output_27_cast_fp16)[name = string("transpose_146")]; + tensor var_3699_cast_fp16 = conv(dilations = var_3699_dilations_0, groups = var_3699_groups_0, pad = var_3699_pad_0, pad_type = var_3699_pad_type_0, strides = var_3699_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3684_cast_fp16)[name = string("op_3699_cast_fp16")]; + tensor var_3703 = const()[name = string("op_3703"), val = tensor([0, 2, 1])]; + int32 var_3709 = const()[name = string("op_3709"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_91_cast_fp16 = transpose(perm = var_3703, x = var_3699_cast_fp16)[name = string("transpose_145")]; + tensor var_3711_cast_fp16 = mul(x = x_91_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3711_cast_fp16")]; + bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; + tensor input_139_cast_fp16 = concat(axis = var_3709, interleave = input_139_interleave_0, values = (x_91_cast_fp16, var_3711_cast_fp16))[name = string("input_139_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_3706_to_fp16 = const()[name = string("op_3706_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_3706_to_fp16, x = input_139_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_3716_split_sizes_0 = const()[name = string("op_3716_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3716_axis_0 = const()[name = string("op_3716_axis_0"), val = int32(-1)]; + tensor var_3716_cast_fp16_0, tensor var_3716_cast_fp16_1 = split(axis = var_3716_axis_0, split_sizes = var_3716_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_3716_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559555008)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_3716_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_93_cast_fp16")]; + int32 var_3725 = const()[name = string("op_3725"), val = int32(-1)]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3727_cast_fp16 = mul(x = x_93_cast_fp16, y = const_57_promoted_to_fp16)[name = string("op_3727_cast_fp16")]; + bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; + tensor input_141_cast_fp16 = concat(axis = var_3725, interleave = input_141_interleave_0, values = (x_93_cast_fp16, var_3727_cast_fp16))[name = string("input_141_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_3722_to_fp16 = const()[name = string("op_3722_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_3722_to_fp16, x = input_141_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor var_3732_split_sizes_0 = const()[name = string("op_3732_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3732_axis_0 = const()[name = string("op_3732_axis_0"), val = int32(-1)]; + tensor var_3732_cast_fp16_0, tensor var_3732_cast_fp16_1 = split(axis = var_3732_axis_0, split_sizes = var_3732_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_3732_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559560192)))]; + tensor h_27_cast_fp16 = mul(x = var_3732_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_3743 = const()[name = string("op_3743"), val = tensor([0, 2, 1])]; + tensor input_143_axes_0 = const()[name = string("input_143_axes_0"), val = tensor([2])]; + tensor var_3744 = transpose(perm = var_3743, x = h_27_cast_fp16)[name = string("transpose_144")]; + tensor input_143 = expand_dims(axes = input_143_axes_0, x = var_3744)[name = string("input_143")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_143)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_143)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_145 = mul(x = gate_19, y = up_9)[name = string("input_145")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_145)[name = string("mlp_out_9")]; + tensor var_3784_axes_0 = const()[name = string("op_3784_axes_0"), val = tensor([2])]; + tensor var_3784 = squeeze(axes = var_3784_axes_0, x = mlp_out_9)[name = string("op_3784")]; + tensor var_3788 = const()[name = string("op_3788"), val = tensor([0, 2, 1])]; + int32 var_3794 = const()[name = string("op_3794"), val = int32(-1)]; + fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)]; + tensor x_95 = transpose(perm = var_3788, x = var_3784)[name = string("transpose_143")]; + tensor var_3796 = mul(x = x_95, y = const_58_promoted)[name = string("op_3796")]; + bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; + tensor input_147 = concat(axis = var_3794, interleave = input_147_interleave_0, values = (x_95, var_3796))[name = string("input_147")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_3791_to_fp16 = const()[name = string("op_3791_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3791_to_fp16, x = input_147)[name = string("normed_137_cast_fp16")]; + tensor var_3801_split_sizes_0 = const()[name = string("op_3801_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3801_axis_0 = const()[name = string("op_3801_axis_0"), val = int32(-1)]; + tensor var_3801_0, tensor var_3801_1 = split(axis = var_3801_axis_0, split_sizes = var_3801_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3801")]; + tensor hidden_states_43 = mul(x = var_3801_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_93_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 1024])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 3, 1280])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_3829 = const()[name = string("op_3829"), val = tensor([0, 2, 1])]; + tensor input_149_axes_0 = const()[name = string("input_149_axes_0"), val = tensor([2])]; + tensor var_3830 = transpose(perm = var_3829, x = hidden_states_45_cast_fp16)[name = string("transpose_142")]; + tensor input_149 = expand_dims(axes = input_149_axes_0, x = var_3830)[name = string("input_149")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_149)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_3849 = const()[name = string("op_3849"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_3850_cast_fp16 = transpose(perm = var_3849, x = per_layer_slice_9_cast_fp16)[name = string("transpose_141")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_3850_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_151_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_151_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559565376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559893120))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_151_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_3866_axes_0 = const()[name = string("op_3866_axes_0"), val = tensor([2])]; + tensor var_3866_cast_fp16 = squeeze(axes = var_3866_axes_0, x = gated_29_cast_fp16)[name = string("op_3866_cast_fp16")]; + tensor var_3870 = const()[name = string("op_3870"), val = tensor([0, 2, 1])]; + int32 var_3876 = const()[name = string("op_3876"), val = int32(-1)]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_97_cast_fp16 = transpose(perm = var_3870, x = var_3866_cast_fp16)[name = string("transpose_140")]; + tensor var_3878_cast_fp16 = mul(x = x_97_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3878_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_3876, interleave = input_153_interleave_0, values = (x_97_cast_fp16, var_3878_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_3873_to_fp16 = const()[name = string("op_3873_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3873_to_fp16, x = input_153_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_3883_split_sizes_0 = const()[name = string("op_3883_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3883_axis_0 = const()[name = string("op_3883_axis_0"), val = int32(-1)]; + tensor var_3883_cast_fp16_0, tensor var_3883_cast_fp16_1 = split(axis = var_3883_axis_0, split_sizes = var_3883_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3883_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559895744)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_3883_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = tensor([0x1.2cp-1])]; + tensor x_99_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_60_promoted_to_fp16)[name = string("x_99_cast_fp16")]; + int32 var_3898 = const()[name = string("op_3898"), val = int32(-1)]; + fp16 const_61_promoted_to_fp16 = const()[name = string("const_61_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3900_cast_fp16 = mul(x = x_99_cast_fp16, y = const_61_promoted_to_fp16)[name = string("op_3900_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_3898, interleave = input_155_interleave_0, values = (x_99_cast_fp16, var_3900_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_3895_to_fp16 = const()[name = string("op_3895_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_3895_to_fp16, x = input_155_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor var_3905_split_sizes_0 = const()[name = string("op_3905_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3905_axis_0 = const()[name = string("op_3905_axis_0"), val = int32(-1)]; + tensor var_3905_cast_fp16_0, tensor var_3905_cast_fp16_1 = split(axis = var_3905_axis_0, split_sizes = var_3905_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_3905_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559900928)))]; + tensor h_31_cast_fp16 = mul(x = var_3905_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_3911 = const()[name = string("op_3911"), val = tensor([0, 2, 1])]; + tensor var_3914_axes_0 = const()[name = string("op_3914_axes_0"), val = tensor([2])]; + tensor var_3912_cast_fp16 = transpose(perm = var_3911, x = h_31_cast_fp16)[name = string("transpose_139")]; + tensor var_3914_cast_fp16 = expand_dims(axes = var_3914_axes_0, x = var_3912_cast_fp16)[name = string("op_3914_cast_fp16")]; + string q_61_pad_type_0 = const()[name = string("q_61_pad_type_0"), val = string("valid")]; + tensor q_61_strides_0 = const()[name = string("q_61_strides_0"), val = tensor([1, 1])]; + tensor q_61_pad_0 = const()[name = string("q_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_61_dilations_0 = const()[name = string("q_61_dilations_0"), val = tensor([1, 1])]; + int32 q_61_groups_0 = const()[name = string("q_61_groups_0"), val = int32(1)]; + tensor q_61 = conv(dilations = q_61_dilations_0, groups = q_61_groups_0, pad = q_61_pad_0, pad_type = q_61_pad_type_0, strides = q_61_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_3914_cast_fp16)[name = string("q_61")]; + tensor var_3935 = const()[name = string("op_3935"), val = tensor([1, 8, 512, 3])]; + tensor var_3936 = reshape(shape = var_3935, x = q_61)[name = string("op_3936")]; + tensor transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_3959 = const()[name = string("op_3959"), val = tensor([3, 8, 512])]; + tensor transpose_63 = transpose(perm = transpose_63_perm_0, x = var_3936)[name = string("transpose_138")]; + tensor x_101 = reshape(shape = var_3959, x = transpose_63)[name = string("x_101")]; + int32 var_3965 = const()[name = string("op_3965"), val = int32(-1)]; + fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; + tensor var_3967 = mul(x = x_101, y = const_62_promoted)[name = string("op_3967")]; + bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; + tensor input_159 = concat(axis = var_3965, interleave = input_159_interleave_0, values = (x_101, var_3967))[name = string("input_159")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_3962_to_fp16 = const()[name = string("op_3962_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_3962_to_fp16, x = input_159)[name = string("normed_149_cast_fp16")]; + tensor var_3972_split_sizes_0 = const()[name = string("op_3972_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3972_axis_0 = const()[name = string("op_3972_axis_0"), val = int32(-1)]; + tensor var_3972_0, tensor var_3972_1 = split(axis = var_3972_axis_0, split_sizes = var_3972_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_3972")]; + tensor q_65 = mul(x = var_3972_0, y = layers_5_self_attn_q_norm_weight)[name = string("q_65")]; + tensor var_3979 = const()[name = string("op_3979"), val = tensor([1, 3, 8, 512])]; + tensor var_3980 = reshape(shape = var_3979, x = q_65)[name = string("op_3980")]; + tensor var_3985 = const()[name = string("op_3985"), val = tensor([0, 2, 1, 3])]; + tensor q_67 = transpose(perm = var_3985, x = var_3980)[name = string("transpose_137")]; + tensor var_3987_cast_fp16 = mul(x = q_67, y = cos_f)[name = string("op_3987_cast_fp16")]; + tensor var_3988_split_sizes_0 = const()[name = string("op_3988_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3988_axis_0 = const()[name = string("op_3988_axis_0"), val = int32(-1)]; + tensor var_3988_0, tensor var_3988_1 = split(axis = var_3988_axis_0, split_sizes = var_3988_split_sizes_0, x = q_67)[name = string("op_3988")]; + fp16 const_63_promoted = const()[name = string("const_63_promoted"), val = fp16(-0x1p+0)]; + tensor var_3990 = mul(x = var_3988_1, y = const_63_promoted)[name = string("op_3990")]; + int32 var_3992 = const()[name = string("op_3992"), val = int32(-1)]; + bool var_3993_interleave_0 = const()[name = string("op_3993_interleave_0"), val = bool(false)]; + tensor var_3993 = concat(axis = var_3992, interleave = var_3993_interleave_0, values = (var_3990, var_3988_0))[name = string("op_3993")]; + tensor var_3994_cast_fp16 = mul(x = var_3993, y = sin_f)[name = string("op_3994_cast_fp16")]; + tensor q_71_cast_fp16 = add(x = var_3987_cast_fp16, y = var_3994_cast_fp16)[name = string("q_71_cast_fp16")]; + string k_31_pad_type_0 = const()[name = string("k_31_pad_type_0"), val = string("valid")]; + tensor k_31_strides_0 = const()[name = string("k_31_strides_0"), val = tensor([1, 1])]; + tensor k_31_pad_0 = const()[name = string("k_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_31_dilations_0 = const()[name = string("k_31_dilations_0"), val = tensor([1, 1])]; + int32 k_31_groups_0 = const()[name = string("k_31_groups_0"), val = int32(1)]; + tensor k_31 = conv(dilations = k_31_dilations_0, groups = k_31_groups_0, pad = k_31_pad_0, pad_type = k_31_pad_type_0, strides = k_31_strides_0, weight = layers_5_self_attn_k_proj_weight_palettized, x = var_3914_cast_fp16)[name = string("k_31")]; + tensor var_4012 = const()[name = string("op_4012"), val = tensor([1, 2, 512, 3])]; + tensor var_4013 = reshape(shape = var_4012, x = k_31)[name = string("op_4013")]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_11_pad_type_0 = const()[name = string("v_11_pad_type_0"), val = string("valid")]; + tensor v_11_strides_0 = const()[name = string("v_11_strides_0"), val = tensor([1, 1])]; + tensor v_11_pad_0 = const()[name = string("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_11_dilations_0 = const()[name = string("v_11_dilations_0"), val = tensor([1, 1])]; + int32 v_11_groups_0 = const()[name = string("v_11_groups_0"), val = int32(1)]; + tensor v_11 = conv(dilations = v_11_dilations_0, groups = v_11_groups_0, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = v_11_strides_0, weight = layers_5_self_attn_v_proj_weight_palettized, x = var_3914_cast_fp16)[name = string("v_11")]; + tensor var_4040 = const()[name = string("op_4040"), val = tensor([1, 2, 512, 3])]; + tensor var_4041 = reshape(shape = var_4040, x = v_11)[name = string("op_4041")]; + tensor var_4046 = const()[name = string("op_4046"), val = tensor([0, 1, 3, 2])]; + tensor var_4064 = const()[name = string("op_4064"), val = tensor([3, 2, 512])]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = var_4013)[name = string("transpose_136")]; + tensor x_103 = reshape(shape = var_4064, x = transpose_64)[name = string("x_103")]; + int32 var_4070 = const()[name = string("op_4070"), val = int32(-1)]; + fp16 const_64_promoted = const()[name = string("const_64_promoted"), val = fp16(-0x1p+0)]; + tensor var_4072 = mul(x = x_103, y = const_64_promoted)[name = string("op_4072")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161 = concat(axis = var_4070, interleave = input_161_interleave_0, values = (x_103, var_4072))[name = string("input_161")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_4067_to_fp16 = const()[name = string("op_4067_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_4067_to_fp16, x = input_161)[name = string("normed_153_cast_fp16")]; + tensor var_4077_split_sizes_0 = const()[name = string("op_4077_split_sizes_0"), val = tensor([512, 512])]; + int32 var_4077_axis_0 = const()[name = string("op_4077_axis_0"), val = int32(-1)]; + tensor var_4077_0, tensor var_4077_1 = split(axis = var_4077_axis_0, split_sizes = var_4077_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_4077")]; + tensor k_35 = mul(x = var_4077_0, y = layers_5_self_attn_k_norm_weight)[name = string("k_35")]; + tensor var_4084 = const()[name = string("op_4084"), val = tensor([1, 3, 2, 512])]; + tensor var_4085 = reshape(shape = var_4084, x = k_35)[name = string("op_4085")]; + tensor var_4090 = const()[name = string("op_4090"), val = tensor([0, 2, 1, 3])]; + fp16 var_4092_promoted = const()[name = string("op_4092_promoted"), val = fp16(0x1p+1)]; + tensor var_4047 = transpose(perm = var_4046, x = var_4041)[name = string("transpose_135")]; + tensor var_4093 = pow(x = var_4047, y = var_4092_promoted)[name = string("op_4093")]; + tensor var_4098_axes_0 = const()[name = string("op_4098_axes_0"), val = tensor([-1])]; + bool var_4098_keep_dims_0 = const()[name = string("op_4098_keep_dims_0"), val = bool(true)]; + tensor var_4098 = reduce_mean(axes = var_4098_axes_0, keep_dims = var_4098_keep_dims_0, x = var_4093)[name = string("op_4098")]; + fp16 var_4100_to_fp16 = const()[name = string("op_4100_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_11_cast_fp16 = add(x = var_4098, y = var_4100_to_fp16)[name = string("mean_sq_11_cast_fp16")]; + fp32 var_4102_epsilon_0 = const()[name = string("op_4102_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4102_cast_fp16 = rsqrt(epsilon = var_4102_epsilon_0, x = mean_sq_11_cast_fp16)[name = string("op_4102_cast_fp16")]; + tensor v_13_cast_fp16 = mul(x = var_4047, y = var_4102_cast_fp16)[name = string("v_13_cast_fp16")]; + tensor q_69 = transpose(perm = var_4090, x = var_4085)[name = string("transpose_134")]; + tensor var_4104_cast_fp16 = mul(x = q_69, y = cos_f)[name = string("op_4104_cast_fp16")]; + tensor var_4105_split_sizes_0 = const()[name = string("op_4105_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4105_axis_0 = const()[name = string("op_4105_axis_0"), val = int32(-1)]; + tensor var_4105_0, tensor var_4105_1 = split(axis = var_4105_axis_0, split_sizes = var_4105_split_sizes_0, x = q_69)[name = string("op_4105")]; + fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)]; + tensor var_4107 = mul(x = var_4105_1, y = const_65_promoted)[name = string("op_4107")]; + int32 var_4109 = const()[name = string("op_4109"), val = int32(-1)]; + bool var_4110_interleave_0 = const()[name = string("op_4110_interleave_0"), val = bool(false)]; + tensor var_4110 = concat(axis = var_4109, interleave = var_4110_interleave_0, values = (var_4107, var_4105_0))[name = string("op_4110")]; + tensor var_4111_cast_fp16 = mul(x = var_4110, y = sin_f)[name = string("op_4111_cast_fp16")]; + tensor k_37_cast_fp16 = add(x = var_4104_cast_fp16, y = var_4111_cast_fp16)[name = string("k_37_cast_fp16")]; + tensor var_4120_reps_0 = const()[name = string("op_4120_reps_0"), val = tensor([1, 2, 1, 1])]; + tensor var_4120_cast_fp16 = tile(reps = var_4120_reps_0, x = update_indicator)[name = string("op_4120_cast_fp16")]; + bool k_scattered_1_transpose_x_0 = const()[name = string("k_scattered_1_transpose_x_0"), val = bool(false)]; + bool k_scattered_1_transpose_y_0 = const()[name = string("k_scattered_1_transpose_y_0"), val = bool(false)]; + tensor k_scattered_1_cast_fp16 = matmul(transpose_x = k_scattered_1_transpose_x_0, transpose_y = k_scattered_1_transpose_y_0, x = var_4120_cast_fp16, y = k_37_cast_fp16)[name = string("k_scattered_1_cast_fp16")]; + bool v_scattered_1_transpose_x_0 = const()[name = string("v_scattered_1_transpose_x_0"), val = bool(false)]; + bool v_scattered_1_transpose_y_0 = const()[name = string("v_scattered_1_transpose_y_0"), val = bool(false)]; + tensor v_scattered_1_cast_fp16 = matmul(transpose_x = v_scattered_1_transpose_x_0, transpose_y = v_scattered_1_transpose_y_0, x = var_4120_cast_fp16, y = v_13_cast_fp16)[name = string("v_scattered_1_cast_fp16")]; + tensor var_4134_axes_0 = const()[name = string("op_4134_axes_0"), val = tensor([-1])]; + bool var_4134_keep_dims_0 = const()[name = string("op_4134_keep_dims_0"), val = bool(true)]; + tensor var_4134_cast_fp16 = reduce_sum(axes = var_4134_axes_0, keep_dims = var_4134_keep_dims_0, x = update_indicator)[name = string("op_4134_cast_fp16")]; + tensor slot_k_11_begin_0 = const()[name = string("slot_k_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor slot_k_11_end_0 = const()[name = string("slot_k_11_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor slot_k_11_end_mask_0 = const()[name = string("slot_k_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_11_cast_fp16 = slice_by_index(begin = slot_k_11_begin_0, end = slot_k_11_end_0, end_mask = slot_k_11_end_mask_0, x = K_full_in)[name = string("slot_k_11_cast_fp16")]; + tensor slot_v_11_begin_0 = const()[name = string("slot_v_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor slot_v_11_end_0 = const()[name = string("slot_v_11_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor slot_v_11_end_mask_0 = const()[name = string("slot_v_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_11_cast_fp16 = slice_by_index(begin = slot_v_11_begin_0, end = slot_v_11_end_0, end_mask = slot_v_11_end_mask_0, x = V_full_in)[name = string("slot_v_11_cast_fp16")]; + fp16 var_4145_promoted_to_fp16 = const()[name = string("op_4145_promoted_to_fp16"), val = fp16(0x1p+0)]; + tensor var_4147_cast_fp16 = sub(x = var_4145_promoted_to_fp16, y = var_4134_cast_fp16)[name = string("op_4147_cast_fp16")]; + tensor var_4148_cast_fp16 = mul(x = slot_k_11_cast_fp16, y = var_4147_cast_fp16)[name = string("op_4148_cast_fp16")]; + tensor new_k_11_cast_fp16 = add(x = var_4148_cast_fp16, y = k_scattered_1_cast_fp16)[name = string("new_k_11_cast_fp16")]; + tensor var_4154_cast_fp16 = mul(x = slot_v_11_cast_fp16, y = var_4147_cast_fp16)[name = string("op_4154_cast_fp16")]; + tensor new_v_11_cast_fp16 = add(x = var_4154_cast_fp16, y = v_scattered_1_cast_fp16)[name = string("new_v_11_cast_fp16")]; + tensor var_4166_begin_0 = const()[name = string("op_4166_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_4166_end_0 = const()[name = string("op_4166_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_4166_end_mask_0 = const()[name = string("op_4166_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4166_cast_fp16 = slice_by_index(begin = var_4166_begin_0, end = var_4166_end_0, end_mask = var_4166_end_mask_0, x = K_full_in)[name = string("op_4166_cast_fp16")]; + int32 var_4168 = const()[name = string("op_4168"), val = int32(0)]; + bool K_full_out_1_interleave_0 = const()[name = string("K_full_out_1_interleave_0"), val = bool(false)]; + tensor K_full_out_1_cast_fp16 = concat(axis = var_4168, interleave = K_full_out_1_interleave_0, values = (new_k_11_cast_fp16, var_4166_cast_fp16))[name = string("K_full_out_1_cast_fp16")]; + tensor var_4179_begin_0 = const()[name = string("op_4179_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_4179_end_0 = const()[name = string("op_4179_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_4179_end_mask_0 = const()[name = string("op_4179_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4179_cast_fp16 = slice_by_index(begin = var_4179_begin_0, end = var_4179_end_0, end_mask = var_4179_end_mask_0, x = V_full_in)[name = string("op_4179_cast_fp16")]; + int32 var_4181 = const()[name = string("op_4181"), val = int32(0)]; + bool V_full_out_1_interleave_0 = const()[name = string("V_full_out_1_interleave_0"), val = bool(false)]; + tensor V_full_out_1_cast_fp16 = concat(axis = var_4181, interleave = V_full_out_1_interleave_0, values = (new_v_11_cast_fp16, var_4179_cast_fp16))[name = string("V_full_out_1_cast_fp16")]; + tensor var_4187_begin_0 = const()[name = string("op_4187_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4187_end_0 = const()[name = string("op_4187_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_4187_end_mask_0 = const()[name = string("op_4187_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4187_cast_fp16 = slice_by_index(begin = var_4187_begin_0, end = var_4187_end_0, end_mask = var_4187_end_mask_0, x = K_full_out_1_cast_fp16)[name = string("op_4187_cast_fp16")]; + tensor var_4197_begin_0 = const()[name = string("op_4197_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4197_end_0 = const()[name = string("op_4197_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_4197_end_mask_0 = const()[name = string("op_4197_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4197_cast_fp16 = slice_by_index(begin = var_4197_begin_0, end = var_4197_end_0, end_mask = var_4197_end_mask_0, x = V_full_out_1_cast_fp16)[name = string("op_4197_cast_fp16")]; + tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = var_4187_cast_fp16)[name = string("transpose_133")]; + tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_22, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_132")]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_23, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = var_4197_cast_fp16)[name = string("transpose_131")]; + tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; + tensor concat_24 = const()[name = string("concat_24"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_24, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; + tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_25 = const()[name = string("concat_25"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_130")]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_25, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor V_expanded_11_perm_0 = const()[name = string("V_expanded_11_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor transpose_65_cast_fp16 = transpose(perm = transpose_65_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_129")]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_71_cast_fp16, y = transpose_65_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_full)[name = string("x_107_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_107_cast_fp16)[name = string("reduce_max_5")]; + tensor var_4232 = sub(x = x_107_cast_fp16, y = reduce_max_5)[name = string("op_4232")]; + tensor var_4238 = exp(x = var_4232)[name = string("op_4238")]; + tensor var_4248_axes_0 = const()[name = string("op_4248_axes_0"), val = tensor([-1])]; + bool var_4248_keep_dims_0 = const()[name = string("op_4248_keep_dims_0"), val = bool(true)]; + tensor var_4248 = reduce_sum(axes = var_4248_axes_0, keep_dims = var_4248_keep_dims_0, x = var_4238)[name = string("op_4248")]; + tensor var_4254_cast_fp16 = real_div(x = var_4238, y = var_4248)[name = string("op_4254_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor V_expanded_11_cast_fp16 = transpose(perm = V_expanded_11_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_128")]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_4254_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_4265 = const()[name = string("op_4265"), val = tensor([0, 2, 1, 3])]; + tensor var_4272 = const()[name = string("op_4272"), val = tensor([1, 3, -1])]; + tensor var_4266_cast_fp16 = transpose(perm = var_4265, x = attn_output_31_cast_fp16)[name = string("transpose_127")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_4272, x = var_4266_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_4277 = const()[name = string("op_4277"), val = tensor([0, 2, 1])]; + string var_4293_pad_type_0 = const()[name = string("op_4293_pad_type_0"), val = string("valid")]; + int32 var_4293_groups_0 = const()[name = string("op_4293_groups_0"), val = int32(1)]; + tensor var_4293_strides_0 = const()[name = string("op_4293_strides_0"), val = tensor([1])]; + tensor var_4293_pad_0 = const()[name = string("op_4293_pad_0"), val = tensor([0, 0])]; + tensor var_4293_dilations_0 = const()[name = string("op_4293_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559906112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565149056))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4278_cast_fp16 = transpose(perm = var_4277, x = attn_output_33_cast_fp16)[name = string("transpose_126")]; + tensor var_4293_cast_fp16 = conv(dilations = var_4293_dilations_0, groups = var_4293_groups_0, pad = var_4293_pad_0, pad_type = var_4293_pad_type_0, strides = var_4293_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_4278_cast_fp16)[name = string("op_4293_cast_fp16")]; + tensor var_4297 = const()[name = string("op_4297"), val = tensor([0, 2, 1])]; + int32 var_4303 = const()[name = string("op_4303"), val = int32(-1)]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_111_cast_fp16 = transpose(perm = var_4297, x = var_4293_cast_fp16)[name = string("transpose_125")]; + tensor var_4305_cast_fp16 = mul(x = x_111_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_4305_cast_fp16")]; + bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; + tensor input_165_cast_fp16 = concat(axis = var_4303, interleave = input_165_interleave_0, values = (x_111_cast_fp16, var_4305_cast_fp16))[name = string("input_165_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_4300_to_fp16 = const()[name = string("op_4300_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_4300_to_fp16, x = input_165_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_4310_split_sizes_0 = const()[name = string("op_4310_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4310_axis_0 = const()[name = string("op_4310_axis_0"), val = int32(-1)]; + tensor var_4310_cast_fp16_0, tensor var_4310_cast_fp16_1 = split(axis = var_4310_axis_0, split_sizes = var_4310_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_4310_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565151680)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_4310_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_113_cast_fp16 = add(x = x_99_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_113_cast_fp16")]; + int32 var_4319 = const()[name = string("op_4319"), val = int32(-1)]; + fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4321_cast_fp16 = mul(x = x_113_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_4321_cast_fp16")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167_cast_fp16 = concat(axis = var_4319, interleave = input_167_interleave_0, values = (x_113_cast_fp16, var_4321_cast_fp16))[name = string("input_167_cast_fp16")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_4316_to_fp16 = const()[name = string("op_4316_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_4316_to_fp16, x = input_167_cast_fp16)[name = string("normed_161_cast_fp16")]; + tensor var_4326_split_sizes_0 = const()[name = string("op_4326_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4326_axis_0 = const()[name = string("op_4326_axis_0"), val = int32(-1)]; + tensor var_4326_cast_fp16_0, tensor var_4326_cast_fp16_1 = split(axis = var_4326_axis_0, split_sizes = var_4326_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_4326_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565156864)))]; + tensor h_33_cast_fp16 = mul(x = var_4326_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_4337 = const()[name = string("op_4337"), val = tensor([0, 2, 1])]; + tensor input_169_axes_0 = const()[name = string("input_169_axes_0"), val = tensor([2])]; + tensor var_4338 = transpose(perm = var_4337, x = h_33_cast_fp16)[name = string("transpose_124")]; + tensor input_169 = expand_dims(axes = input_169_axes_0, x = var_4338)[name = string("input_169")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_169)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_169)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_171 = mul(x = gate_23, y = up_11)[name = string("input_171")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_171)[name = string("mlp_out_11")]; + tensor var_4378_axes_0 = const()[name = string("op_4378_axes_0"), val = tensor([2])]; + tensor var_4378 = squeeze(axes = var_4378_axes_0, x = mlp_out_11)[name = string("op_4378")]; + tensor var_4382 = const()[name = string("op_4382"), val = tensor([0, 2, 1])]; + int32 var_4388 = const()[name = string("op_4388"), val = int32(-1)]; + fp16 const_68_promoted = const()[name = string("const_68_promoted"), val = fp16(-0x1p+0)]; + tensor x_115 = transpose(perm = var_4382, x = var_4378)[name = string("transpose_123")]; + tensor var_4390 = mul(x = x_115, y = const_68_promoted)[name = string("op_4390")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173 = concat(axis = var_4388, interleave = input_173_interleave_0, values = (x_115, var_4390))[name = string("input_173")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_4385_to_fp16 = const()[name = string("op_4385_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_4385_to_fp16, x = input_173)[name = string("normed_165_cast_fp16")]; + tensor var_4395_split_sizes_0 = const()[name = string("op_4395_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4395_axis_0 = const()[name = string("op_4395_axis_0"), val = int32(-1)]; + tensor var_4395_0, tensor var_4395_1 = split(axis = var_4395_axis_0, split_sizes = var_4395_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_4395")]; + tensor hidden_states_53 = mul(x = var_4395_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_113_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 1280])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 3, 1536])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_4423 = const()[name = string("op_4423"), val = tensor([0, 2, 1])]; + tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; + tensor var_4424 = transpose(perm = var_4423, x = hidden_states_55_cast_fp16)[name = string("transpose_122")]; + tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_4424)[name = string("input_175")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_175)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_4443 = const()[name = string("op_4443"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_4444_cast_fp16 = transpose(perm = var_4443, x = per_layer_slice_11_cast_fp16)[name = string("transpose_121")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_4444_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_177_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_177_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565162048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565489792))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_4460_axes_0 = const()[name = string("op_4460_axes_0"), val = tensor([2])]; + tensor var_4460_cast_fp16 = squeeze(axes = var_4460_axes_0, x = gated_35_cast_fp16)[name = string("op_4460_cast_fp16")]; + tensor var_4464 = const()[name = string("op_4464"), val = tensor([0, 2, 1])]; + int32 var_4470 = const()[name = string("op_4470"), val = int32(-1)]; + fp16 const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_117_cast_fp16 = transpose(perm = var_4464, x = var_4460_cast_fp16)[name = string("transpose_120")]; + tensor var_4472_cast_fp16 = mul(x = x_117_cast_fp16, y = const_69_promoted_to_fp16)[name = string("op_4472_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_4470, interleave = input_179_interleave_0, values = (x_117_cast_fp16, var_4472_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_4467_to_fp16 = const()[name = string("op_4467_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_4467_to_fp16, x = input_179_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_4477_split_sizes_0 = const()[name = string("op_4477_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4477_axis_0 = const()[name = string("op_4477_axis_0"), val = int32(-1)]; + tensor var_4477_cast_fp16_0, tensor var_4477_cast_fp16_1 = split(axis = var_4477_axis_0, split_sizes = var_4477_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_4477_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565492416)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_4477_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = tensor([0x1.36p-1])]; + tensor x_119_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_70_promoted_to_fp16)[name = string("x_119_cast_fp16")]; + int32 var_4492 = const()[name = string("op_4492"), val = int32(-1)]; + fp16 const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4494_cast_fp16 = mul(x = x_119_cast_fp16, y = const_71_promoted_to_fp16)[name = string("op_4494_cast_fp16")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181_cast_fp16 = concat(axis = var_4492, interleave = input_181_interleave_0, values = (x_119_cast_fp16, var_4494_cast_fp16))[name = string("input_181_cast_fp16")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_4489_to_fp16 = const()[name = string("op_4489_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_4489_to_fp16, x = input_181_cast_fp16)[name = string("normed_173_cast_fp16")]; + tensor var_4499_split_sizes_0 = const()[name = string("op_4499_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4499_axis_0 = const()[name = string("op_4499_axis_0"), val = int32(-1)]; + tensor var_4499_cast_fp16_0, tensor var_4499_cast_fp16_1 = split(axis = var_4499_axis_0, split_sizes = var_4499_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_4499_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565497600)))]; + tensor h_37_cast_fp16 = mul(x = var_4499_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_4505 = const()[name = string("op_4505"), val = tensor([0, 2, 1])]; + tensor var_4508_axes_0 = const()[name = string("op_4508_axes_0"), val = tensor([2])]; + tensor var_4506_cast_fp16 = transpose(perm = var_4505, x = h_37_cast_fp16)[name = string("transpose_119")]; + tensor var_4508_cast_fp16 = expand_dims(axes = var_4508_axes_0, x = var_4506_cast_fp16)[name = string("op_4508_cast_fp16")]; + string q_73_pad_type_0 = const()[name = string("q_73_pad_type_0"), val = string("valid")]; + tensor q_73_strides_0 = const()[name = string("q_73_strides_0"), val = tensor([1, 1])]; + tensor q_73_pad_0 = const()[name = string("q_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_73_dilations_0 = const()[name = string("q_73_dilations_0"), val = tensor([1, 1])]; + int32 q_73_groups_0 = const()[name = string("q_73_groups_0"), val = int32(1)]; + tensor q_73 = conv(dilations = q_73_dilations_0, groups = q_73_groups_0, pad = q_73_pad_0, pad_type = q_73_pad_type_0, strides = q_73_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_4508_cast_fp16)[name = string("q_73")]; + tensor var_4529 = const()[name = string("op_4529"), val = tensor([1, 8, 256, 3])]; + tensor var_4530 = reshape(shape = var_4529, x = q_73)[name = string("op_4530")]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_4553 = const()[name = string("op_4553"), val = tensor([3, 8, 256])]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = var_4530)[name = string("transpose_118")]; + tensor x_121 = reshape(shape = var_4553, x = transpose_66)[name = string("x_121")]; + int32 var_4559 = const()[name = string("op_4559"), val = int32(-1)]; + fp16 const_72_promoted = const()[name = string("const_72_promoted"), val = fp16(-0x1p+0)]; + tensor var_4561 = mul(x = x_121, y = const_72_promoted)[name = string("op_4561")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_4559, interleave = input_185_interleave_0, values = (x_121, var_4561))[name = string("input_185")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_4556_to_fp16 = const()[name = string("op_4556_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_4556_to_fp16, x = input_185)[name = string("normed_177_cast_fp16")]; + tensor var_4566_split_sizes_0 = const()[name = string("op_4566_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4566_axis_0 = const()[name = string("op_4566_axis_0"), val = int32(-1)]; + tensor var_4566_0, tensor var_4566_1 = split(axis = var_4566_axis_0, split_sizes = var_4566_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_4566")]; + tensor q_77 = mul(x = var_4566_0, y = layers_3_self_attn_q_norm_weight)[name = string("q_77")]; + tensor var_4573 = const()[name = string("op_4573"), val = tensor([1, 3, 8, 256])]; + tensor var_4574 = reshape(shape = var_4573, x = q_77)[name = string("op_4574")]; + tensor var_4579 = const()[name = string("op_4579"), val = tensor([0, 2, 1, 3])]; + tensor q_79 = transpose(perm = var_4579, x = var_4574)[name = string("transpose_117")]; + tensor var_4581_cast_fp16 = mul(x = q_79, y = cos_s)[name = string("op_4581_cast_fp16")]; + tensor var_4582_split_sizes_0 = const()[name = string("op_4582_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4582_axis_0 = const()[name = string("op_4582_axis_0"), val = int32(-1)]; + tensor var_4582_0, tensor var_4582_1 = split(axis = var_4582_axis_0, split_sizes = var_4582_split_sizes_0, x = q_79)[name = string("op_4582")]; + fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; + tensor var_4584 = mul(x = var_4582_1, y = const_73_promoted)[name = string("op_4584")]; + int32 var_4586 = const()[name = string("op_4586"), val = int32(-1)]; + bool var_4587_interleave_0 = const()[name = string("op_4587_interleave_0"), val = bool(false)]; + tensor var_4587 = concat(axis = var_4586, interleave = var_4587_interleave_0, values = (var_4584, var_4582_0))[name = string("op_4587")]; + tensor var_4588_cast_fp16 = mul(x = var_4587, y = sin_s)[name = string("op_4588_cast_fp16")]; + tensor q_83_cast_fp16 = add(x = var_4581_cast_fp16, y = var_4588_cast_fp16)[name = string("q_83_cast_fp16")]; + string k_39_pad_type_0 = const()[name = string("k_39_pad_type_0"), val = string("valid")]; + tensor k_39_strides_0 = const()[name = string("k_39_strides_0"), val = tensor([1, 1])]; + tensor k_39_pad_0 = const()[name = string("k_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_39_dilations_0 = const()[name = string("k_39_dilations_0"), val = tensor([1, 1])]; + int32 k_39_groups_0 = const()[name = string("k_39_groups_0"), val = int32(1)]; + tensor k_39 = conv(dilations = k_39_dilations_0, groups = k_39_groups_0, pad = k_39_pad_0, pad_type = k_39_pad_type_0, strides = k_39_strides_0, weight = layers_6_self_attn_k_proj_weight_palettized, x = var_4508_cast_fp16)[name = string("k_39")]; + tensor var_4606 = const()[name = string("op_4606"), val = tensor([1, 2, 256, 3])]; + tensor var_4607 = reshape(shape = var_4606, x = k_39)[name = string("op_4607")]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_15_pad_type_0 = const()[name = string("v_15_pad_type_0"), val = string("valid")]; + tensor v_15_strides_0 = const()[name = string("v_15_strides_0"), val = tensor([1, 1])]; + tensor v_15_pad_0 = const()[name = string("v_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_15_dilations_0 = const()[name = string("v_15_dilations_0"), val = tensor([1, 1])]; + int32 v_15_groups_0 = const()[name = string("v_15_groups_0"), val = int32(1)]; + tensor v_15 = conv(dilations = v_15_dilations_0, groups = v_15_groups_0, pad = v_15_pad_0, pad_type = v_15_pad_type_0, strides = v_15_strides_0, weight = layers_6_self_attn_v_proj_weight_palettized, x = var_4508_cast_fp16)[name = string("v_15")]; + tensor var_4634 = const()[name = string("op_4634"), val = tensor([1, 2, 256, 3])]; + tensor var_4635 = reshape(shape = var_4634, x = v_15)[name = string("op_4635")]; + tensor var_4640 = const()[name = string("op_4640"), val = tensor([0, 1, 3, 2])]; + tensor var_4658 = const()[name = string("op_4658"), val = tensor([3, 2, 256])]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = var_4607)[name = string("transpose_116")]; + tensor x_123 = reshape(shape = var_4658, x = transpose_67)[name = string("x_123")]; + int32 var_4664 = const()[name = string("op_4664"), val = int32(-1)]; + fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)]; + tensor var_4666 = mul(x = x_123, y = const_74_promoted)[name = string("op_4666")]; + bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; + tensor input_187 = concat(axis = var_4664, interleave = input_187_interleave_0, values = (x_123, var_4666))[name = string("input_187")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_4661_to_fp16 = const()[name = string("op_4661_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_4661_to_fp16, x = input_187)[name = string("normed_181_cast_fp16")]; + tensor var_4671_split_sizes_0 = const()[name = string("op_4671_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4671_axis_0 = const()[name = string("op_4671_axis_0"), val = int32(-1)]; + tensor var_4671_0, tensor var_4671_1 = split(axis = var_4671_axis_0, split_sizes = var_4671_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_4671")]; + tensor k_43 = mul(x = var_4671_0, y = layers_6_self_attn_k_norm_weight)[name = string("k_43")]; + tensor var_4678 = const()[name = string("op_4678"), val = tensor([1, 3, 2, 256])]; + tensor var_4679 = reshape(shape = var_4678, x = k_43)[name = string("op_4679")]; + tensor var_4684 = const()[name = string("op_4684"), val = tensor([0, 2, 1, 3])]; + fp16 var_4686_promoted = const()[name = string("op_4686_promoted"), val = fp16(0x1p+1)]; + tensor var_4641 = transpose(perm = var_4640, x = var_4635)[name = string("transpose_115")]; + tensor var_4687 = pow(x = var_4641, y = var_4686_promoted)[name = string("op_4687")]; + tensor var_4692_axes_0 = const()[name = string("op_4692_axes_0"), val = tensor([-1])]; + bool var_4692_keep_dims_0 = const()[name = string("op_4692_keep_dims_0"), val = bool(true)]; + tensor var_4692 = reduce_mean(axes = var_4692_axes_0, keep_dims = var_4692_keep_dims_0, x = var_4687)[name = string("op_4692")]; + fp16 var_4694_to_fp16 = const()[name = string("op_4694_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_13_cast_fp16 = add(x = var_4692, y = var_4694_to_fp16)[name = string("mean_sq_13_cast_fp16")]; + fp32 var_4696_epsilon_0 = const()[name = string("op_4696_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4696_cast_fp16 = rsqrt(epsilon = var_4696_epsilon_0, x = mean_sq_13_cast_fp16)[name = string("op_4696_cast_fp16")]; + tensor input_191_cast_fp16 = mul(x = var_4641, y = var_4696_cast_fp16)[name = string("input_191_cast_fp16")]; + tensor q_81 = transpose(perm = var_4684, x = var_4679)[name = string("transpose_114")]; + tensor var_4698_cast_fp16 = mul(x = q_81, y = cos_s)[name = string("op_4698_cast_fp16")]; + tensor var_4699_split_sizes_0 = const()[name = string("op_4699_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4699_axis_0 = const()[name = string("op_4699_axis_0"), val = int32(-1)]; + tensor var_4699_0, tensor var_4699_1 = split(axis = var_4699_axis_0, split_sizes = var_4699_split_sizes_0, x = q_81)[name = string("op_4699")]; + fp16 const_75_promoted = const()[name = string("const_75_promoted"), val = fp16(-0x1p+0)]; + tensor var_4701 = mul(x = var_4699_1, y = const_75_promoted)[name = string("op_4701")]; + int32 var_4703 = const()[name = string("op_4703"), val = int32(-1)]; + bool var_4704_interleave_0 = const()[name = string("op_4704_interleave_0"), val = bool(false)]; + tensor var_4704 = concat(axis = var_4703, interleave = var_4704_interleave_0, values = (var_4701, var_4699_0))[name = string("op_4704")]; + tensor var_4705_cast_fp16 = mul(x = var_4704, y = sin_s)[name = string("op_4705_cast_fp16")]; + tensor input_189_cast_fp16 = add(x = var_4698_cast_fp16, y = var_4705_cast_fp16)[name = string("input_189_cast_fp16")]; + tensor k_padded_11_pad_0 = const()[name = string("k_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_11_mode_0 = const()[name = string("k_padded_11_mode_0"), val = string("constant")]; + fp16 const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_11_cast_fp16 = pad(constant_val = const_76_to_fp16, mode = k_padded_11_mode_0, pad = k_padded_11_pad_0, x = input_189_cast_fp16)[name = string("k_padded_11_cast_fp16")]; + tensor v_padded_11_pad_0 = const()[name = string("v_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_11_mode_0 = const()[name = string("v_padded_11_mode_0"), val = string("constant")]; + fp16 const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_11_cast_fp16 = pad(constant_val = const_77_to_fp16, mode = v_padded_11_mode_0, pad = v_padded_11_pad_0, x = input_191_cast_fp16)[name = string("v_padded_11_cast_fp16")]; + tensor slot_k_13_begin_0 = const()[name = string("slot_k_13_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor slot_k_13_end_0 = const()[name = string("slot_k_13_end_0"), val = tensor([6, 2, 512, 512])]; + tensor slot_k_13_end_mask_0 = const()[name = string("slot_k_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_13_cast_fp16 = slice_by_index(begin = slot_k_13_begin_0, end = slot_k_13_end_0, end_mask = slot_k_13_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("slot_k_13_cast_fp16")]; + tensor slot_v_13_begin_0 = const()[name = string("slot_v_13_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor slot_v_13_end_0 = const()[name = string("slot_v_13_end_0"), val = tensor([6, 2, 512, 512])]; + tensor slot_v_13_end_mask_0 = const()[name = string("slot_v_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_13_cast_fp16 = slice_by_index(begin = slot_v_13_begin_0, end = slot_v_13_end_0, end_mask = slot_v_13_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("slot_v_13_cast_fp16")]; + tensor var_4744_begin_0 = const()[name = string("op_4744_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_4744_end_0 = const()[name = string("op_4744_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4744_end_mask_0 = const()[name = string("op_4744_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4744_cast_fp16 = slice_by_index(begin = var_4744_begin_0, end = var_4744_end_0, end_mask = var_4744_end_mask_0, x = slot_k_13_cast_fp16)[name = string("op_4744_cast_fp16")]; + int32 var_4751 = const()[name = string("op_4751"), val = int32(2)]; + bool new_k_13_interleave_0 = const()[name = string("new_k_13_interleave_0"), val = bool(false)]; + tensor new_k_13_cast_fp16 = concat(axis = var_4751, interleave = new_k_13_interleave_0, values = (var_4744_cast_fp16, k_padded_11_cast_fp16))[name = string("new_k_13_cast_fp16")]; + tensor var_4767_begin_0 = const()[name = string("op_4767_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_4767_end_0 = const()[name = string("op_4767_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4767_end_mask_0 = const()[name = string("op_4767_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4767_cast_fp16 = slice_by_index(begin = var_4767_begin_0, end = var_4767_end_0, end_mask = var_4767_end_mask_0, x = slot_v_13_cast_fp16)[name = string("op_4767_cast_fp16")]; + int32 var_4774 = const()[name = string("op_4774"), val = int32(2)]; + bool new_v_13_interleave_0 = const()[name = string("new_v_13_interleave_0"), val = bool(false)]; + tensor new_v_13_cast_fp16 = concat(axis = var_4774, interleave = new_v_13_interleave_0, values = (var_4767_cast_fp16, v_padded_11_cast_fp16))[name = string("new_v_13_cast_fp16")]; + tensor var_4780_begin_0 = const()[name = string("op_4780_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4780_end_0 = const()[name = string("op_4780_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_4780_end_mask_0 = const()[name = string("op_4780_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4780_cast_fp16 = slice_by_index(begin = var_4780_begin_0, end = var_4780_end_0, end_mask = var_4780_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("op_4780_cast_fp16")]; + tensor var_4785_begin_0 = const()[name = string("op_4785_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4785_end_0 = const()[name = string("op_4785_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_4785_end_mask_0 = const()[name = string("op_4785_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4785_cast_fp16 = slice_by_index(begin = var_4785_begin_0, end = var_4785_end_0, end_mask = var_4785_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("op_4785_cast_fp16")]; + int32 var_4787 = const()[name = string("op_4787"), val = int32(0)]; + bool K_sliding_out_11_interleave_0 = const()[name = string("K_sliding_out_11_interleave_0"), val = bool(false)]; + tensor K_sliding_out_11_cast_fp16 = concat(axis = var_4787, interleave = K_sliding_out_11_interleave_0, values = (var_4780_cast_fp16, new_k_13_cast_fp16, var_4785_cast_fp16))[name = string("K_sliding_out_11_cast_fp16")]; + tensor var_4793_begin_0 = const()[name = string("op_4793_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4793_end_0 = const()[name = string("op_4793_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_4793_end_mask_0 = const()[name = string("op_4793_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4793_cast_fp16 = slice_by_index(begin = var_4793_begin_0, end = var_4793_end_0, end_mask = var_4793_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("op_4793_cast_fp16")]; + tensor var_4798_begin_0 = const()[name = string("op_4798_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4798_end_0 = const()[name = string("op_4798_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_4798_end_mask_0 = const()[name = string("op_4798_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4798_cast_fp16 = slice_by_index(begin = var_4798_begin_0, end = var_4798_end_0, end_mask = var_4798_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("op_4798_cast_fp16")]; + int32 var_4800 = const()[name = string("op_4800"), val = int32(0)]; + bool V_sliding_out_11_interleave_0 = const()[name = string("V_sliding_out_11_interleave_0"), val = bool(false)]; + tensor V_sliding_out_11_cast_fp16 = concat(axis = var_4800, interleave = V_sliding_out_11_interleave_0, values = (var_4793_cast_fp16, new_v_13_cast_fp16, var_4798_cast_fp16))[name = string("V_sliding_out_11_cast_fp16")]; + tensor var_4806_begin_0 = const()[name = string("op_4806_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4806_end_0 = const()[name = string("op_4806_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4806_end_mask_0 = const()[name = string("op_4806_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4806_cast_fp16 = slice_by_index(begin = var_4806_begin_0, end = var_4806_end_0, end_mask = var_4806_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("op_4806_cast_fp16")]; + tensor K_for_attn_13_begin_0 = const()[name = string("K_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_13_end_0 = const()[name = string("K_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_13_end_mask_0 = const()[name = string("K_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_13_cast_fp16 = slice_by_index(begin = K_for_attn_13_begin_0, end = K_for_attn_13_end_0, end_mask = K_for_attn_13_end_mask_0, x = var_4806_cast_fp16)[name = string("K_for_attn_13_cast_fp16")]; + tensor var_4816_begin_0 = const()[name = string("op_4816_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4816_end_0 = const()[name = string("op_4816_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4816_end_mask_0 = const()[name = string("op_4816_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4816_cast_fp16 = slice_by_index(begin = var_4816_begin_0, end = var_4816_end_0, end_mask = var_4816_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("op_4816_cast_fp16")]; + tensor V_for_attn_13_begin_0 = const()[name = string("V_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_13_end_0 = const()[name = string("V_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_13_end_mask_0 = const()[name = string("V_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_13_cast_fp16 = slice_by_index(begin = V_for_attn_13_begin_0, end = V_for_attn_13_end_0, end_mask = V_for_attn_13_end_mask_0, x = var_4816_cast_fp16)[name = string("V_for_attn_13_cast_fp16")]; + tensor transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_12_reps_0 = const()[name = string("tile_12_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_24_cast_fp16 = transpose(perm = transpose_24_perm_0, x = K_for_attn_13_cast_fp16)[name = string("transpose_113")]; + tensor tile_12_cast_fp16 = tile(reps = tile_12_reps_0, x = transpose_24_cast_fp16)[name = string("tile_12_cast_fp16")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_26, x = tile_12_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_27 = const()[name = string("concat_27"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = reshape_24_cast_fp16)[name = string("transpose_112")]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_27, x = transpose_25_cast_fp16)[name = string("reshape_25_cast_fp16")]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_13_reps_0 = const()[name = string("tile_13_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_26_cast_fp16 = transpose(perm = transpose_26_perm_0, x = V_for_attn_13_cast_fp16)[name = string("transpose_111")]; + tensor tile_13_cast_fp16 = tile(reps = tile_13_reps_0, x = transpose_26_cast_fp16)[name = string("tile_13_cast_fp16")]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_28, x = tile_13_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_27_cast_fp16 = transpose(perm = transpose_27_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_110")]; + tensor reshape_27_cast_fp16 = reshape(shape = concat_29, x = transpose_27_cast_fp16)[name = string("reshape_27_cast_fp16")]; + tensor V_expanded_13_perm_0 = const()[name = string("V_expanded_13_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor transpose_68_cast_fp16 = transpose(perm = transpose_68_perm_0, x = reshape_25_cast_fp16)[name = string("transpose_109")]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_83_cast_fp16, y = transpose_68_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_127_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_127_cast_fp16)[name = string("reduce_max_6")]; + tensor var_4851 = sub(x = x_127_cast_fp16, y = reduce_max_6)[name = string("op_4851")]; + tensor var_4857 = exp(x = var_4851)[name = string("op_4857")]; + tensor var_4867_axes_0 = const()[name = string("op_4867_axes_0"), val = tensor([-1])]; + bool var_4867_keep_dims_0 = const()[name = string("op_4867_keep_dims_0"), val = bool(true)]; + tensor var_4867 = reduce_sum(axes = var_4867_axes_0, keep_dims = var_4867_keep_dims_0, x = var_4857)[name = string("op_4867")]; + tensor var_4873_cast_fp16 = real_div(x = var_4857, y = var_4867)[name = string("op_4873_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor V_expanded_13_cast_fp16 = transpose(perm = V_expanded_13_perm_0, x = reshape_27_cast_fp16)[name = string("transpose_108")]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_4873_cast_fp16, y = V_expanded_13_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_4884 = const()[name = string("op_4884"), val = tensor([0, 2, 1, 3])]; + tensor var_4891 = const()[name = string("op_4891"), val = tensor([1, 3, -1])]; + tensor var_4885_cast_fp16 = transpose(perm = var_4884, x = attn_output_37_cast_fp16)[name = string("transpose_107")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_4891, x = var_4885_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_4896 = const()[name = string("op_4896"), val = tensor([0, 2, 1])]; + string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")]; + int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)]; + tensor var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor([1])]; + tensor var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor([0, 0])]; + tensor var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(565502784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568124288))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4897_cast_fp16 = transpose(perm = var_4896, x = attn_output_39_cast_fp16)[name = string("transpose_106")]; + tensor var_4912_cast_fp16 = conv(dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4897_cast_fp16)[name = string("op_4912_cast_fp16")]; + tensor var_4916 = const()[name = string("op_4916"), val = tensor([0, 2, 1])]; + int32 var_4922 = const()[name = string("op_4922"), val = int32(-1)]; + fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_131_cast_fp16 = transpose(perm = var_4916, x = var_4912_cast_fp16)[name = string("transpose_105")]; + tensor var_4924_cast_fp16 = mul(x = x_131_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_4924_cast_fp16")]; + bool input_195_interleave_0 = const()[name = string("input_195_interleave_0"), val = bool(false)]; + tensor input_195_cast_fp16 = concat(axis = var_4922, interleave = input_195_interleave_0, values = (x_131_cast_fp16, var_4924_cast_fp16))[name = string("input_195_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_4919_to_fp16 = const()[name = string("op_4919_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_4919_to_fp16, x = input_195_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor var_4929_split_sizes_0 = const()[name = string("op_4929_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4929_axis_0 = const()[name = string("op_4929_axis_0"), val = int32(-1)]; + tensor var_4929_cast_fp16_0, tensor var_4929_cast_fp16_1 = split(axis = var_4929_axis_0, split_sizes = var_4929_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_4929_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568126912)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_4929_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_119_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_133_cast_fp16")]; + int32 var_4938 = const()[name = string("op_4938"), val = int32(-1)]; + fp16 const_79_promoted_to_fp16 = const()[name = string("const_79_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4940_cast_fp16 = mul(x = x_133_cast_fp16, y = const_79_promoted_to_fp16)[name = string("op_4940_cast_fp16")]; + bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; + tensor input_197_cast_fp16 = concat(axis = var_4938, interleave = input_197_interleave_0, values = (x_133_cast_fp16, var_4940_cast_fp16))[name = string("input_197_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_4935_to_fp16 = const()[name = string("op_4935_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_4935_to_fp16, x = input_197_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor var_4945_split_sizes_0 = const()[name = string("op_4945_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4945_axis_0 = const()[name = string("op_4945_axis_0"), val = int32(-1)]; + tensor var_4945_cast_fp16_0, tensor var_4945_cast_fp16_1 = split(axis = var_4945_axis_0, split_sizes = var_4945_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_4945_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568132096)))]; + tensor h_39_cast_fp16 = mul(x = var_4945_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_4956 = const()[name = string("op_4956"), val = tensor([0, 2, 1])]; + tensor input_199_axes_0 = const()[name = string("input_199_axes_0"), val = tensor([2])]; + tensor var_4957 = transpose(perm = var_4956, x = h_39_cast_fp16)[name = string("transpose_104")]; + tensor input_199 = expand_dims(axes = input_199_axes_0, x = var_4957)[name = string("input_199")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_199)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_199)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_201 = mul(x = gate_27, y = up_13)[name = string("input_201")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_201)[name = string("mlp_out_13")]; + tensor var_4997_axes_0 = const()[name = string("op_4997_axes_0"), val = tensor([2])]; + tensor var_4997 = squeeze(axes = var_4997_axes_0, x = mlp_out_13)[name = string("op_4997")]; + tensor var_5001 = const()[name = string("op_5001"), val = tensor([0, 2, 1])]; + int32 var_5007 = const()[name = string("op_5007"), val = int32(-1)]; + fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; + tensor x_135 = transpose(perm = var_5001, x = var_4997)[name = string("transpose_103")]; + tensor var_5009 = mul(x = x_135, y = const_80_promoted)[name = string("op_5009")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203 = concat(axis = var_5007, interleave = input_203_interleave_0, values = (x_135, var_5009))[name = string("input_203")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_5004_to_fp16 = const()[name = string("op_5004_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_5004_to_fp16, x = input_203)[name = string("normed_193_cast_fp16")]; + tensor var_5014_split_sizes_0 = const()[name = string("op_5014_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5014_axis_0 = const()[name = string("op_5014_axis_0"), val = int32(-1)]; + tensor var_5014_0, tensor var_5014_1 = split(axis = var_5014_axis_0, split_sizes = var_5014_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_5014")]; + tensor hidden_states_63 = mul(x = var_5014_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 1536])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 3, 1792])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_5042 = const()[name = string("op_5042"), val = tensor([0, 2, 1])]; + tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; + tensor var_5043 = transpose(perm = var_5042, x = hidden_states_65_cast_fp16)[name = string("transpose_102")]; + tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_5043)[name = string("input_205")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_205)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_5062 = const()[name = string("op_5062"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_5063_cast_fp16 = transpose(perm = var_5062, x = per_layer_slice_13_cast_fp16)[name = string("transpose_101")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_5063_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_207_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_207_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568137280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568465024))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_207_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_5079_axes_0 = const()[name = string("op_5079_axes_0"), val = tensor([2])]; + tensor var_5079_cast_fp16 = squeeze(axes = var_5079_axes_0, x = gated_41_cast_fp16)[name = string("op_5079_cast_fp16")]; + tensor var_5083 = const()[name = string("op_5083"), val = tensor([0, 2, 1])]; + int32 var_5089 = const()[name = string("op_5089"), val = int32(-1)]; + fp16 const_81_promoted_to_fp16 = const()[name = string("const_81_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_137_cast_fp16 = transpose(perm = var_5083, x = var_5079_cast_fp16)[name = string("transpose_100")]; + tensor var_5091_cast_fp16 = mul(x = x_137_cast_fp16, y = const_81_promoted_to_fp16)[name = string("op_5091_cast_fp16")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209_cast_fp16 = concat(axis = var_5089, interleave = input_209_interleave_0, values = (x_137_cast_fp16, var_5091_cast_fp16))[name = string("input_209_cast_fp16")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_5086_to_fp16 = const()[name = string("op_5086_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_5086_to_fp16, x = input_209_cast_fp16)[name = string("normed_197_cast_fp16")]; + tensor var_5096_split_sizes_0 = const()[name = string("op_5096_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5096_axis_0 = const()[name = string("op_5096_axis_0"), val = int32(-1)]; + tensor var_5096_cast_fp16_0, tensor var_5096_cast_fp16_1 = split(axis = var_5096_axis_0, split_sizes = var_5096_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_5096_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568467648)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_5096_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = tensor([0x1.1ep-1])]; + tensor x_139_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_82_promoted_to_fp16)[name = string("x_139_cast_fp16")]; + int32 var_5111 = const()[name = string("op_5111"), val = int32(-1)]; + fp16 const_83_promoted_to_fp16 = const()[name = string("const_83_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5113_cast_fp16 = mul(x = x_139_cast_fp16, y = const_83_promoted_to_fp16)[name = string("op_5113_cast_fp16")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211_cast_fp16 = concat(axis = var_5111, interleave = input_211_interleave_0, values = (x_139_cast_fp16, var_5113_cast_fp16))[name = string("input_211_cast_fp16")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_5108_to_fp16 = const()[name = string("op_5108_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_5108_to_fp16, x = input_211_cast_fp16)[name = string("normed_201_cast_fp16")]; + tensor var_5118_split_sizes_0 = const()[name = string("op_5118_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5118_axis_0 = const()[name = string("op_5118_axis_0"), val = int32(-1)]; + tensor var_5118_cast_fp16_0, tensor var_5118_cast_fp16_1 = split(axis = var_5118_axis_0, split_sizes = var_5118_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_5118_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568472832)))]; + tensor h_43_cast_fp16 = mul(x = var_5118_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_5124 = const()[name = string("op_5124"), val = tensor([0, 2, 1])]; + tensor var_5127_axes_0 = const()[name = string("op_5127_axes_0"), val = tensor([2])]; + tensor var_5125_cast_fp16 = transpose(perm = var_5124, x = h_43_cast_fp16)[name = string("transpose_99")]; + tensor var_5127_cast_fp16 = expand_dims(axes = var_5127_axes_0, x = var_5125_cast_fp16)[name = string("op_5127_cast_fp16")]; + string q_85_pad_type_0 = const()[name = string("q_85_pad_type_0"), val = string("valid")]; + tensor q_85_strides_0 = const()[name = string("q_85_strides_0"), val = tensor([1, 1])]; + tensor q_85_pad_0 = const()[name = string("q_85_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_85_dilations_0 = const()[name = string("q_85_dilations_0"), val = tensor([1, 1])]; + int32 q_85_groups_0 = const()[name = string("q_85_groups_0"), val = int32(1)]; + tensor q_85 = conv(dilations = q_85_dilations_0, groups = q_85_groups_0, pad = q_85_pad_0, pad_type = q_85_pad_type_0, strides = q_85_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_5127_cast_fp16)[name = string("q_85")]; + tensor var_5148 = const()[name = string("op_5148"), val = tensor([1, 8, 256, 3])]; + tensor var_5149 = reshape(shape = var_5148, x = q_85)[name = string("op_5149")]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_5172 = const()[name = string("op_5172"), val = tensor([3, 8, 256])]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = var_5149)[name = string("transpose_98")]; + tensor x_141 = reshape(shape = var_5172, x = transpose_69)[name = string("x_141")]; + int32 var_5178 = const()[name = string("op_5178"), val = int32(-1)]; + fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; + tensor var_5180 = mul(x = x_141, y = const_84_promoted)[name = string("op_5180")]; + bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)]; + tensor input_215 = concat(axis = var_5178, interleave = input_215_interleave_0, values = (x_141, var_5180))[name = string("input_215")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_5175_to_fp16 = const()[name = string("op_5175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_5175_to_fp16, x = input_215)[name = string("normed_205_cast_fp16")]; + tensor var_5185_split_sizes_0 = const()[name = string("op_5185_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5185_axis_0 = const()[name = string("op_5185_axis_0"), val = int32(-1)]; + tensor var_5185_0, tensor var_5185_1 = split(axis = var_5185_axis_0, split_sizes = var_5185_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_5185")]; + tensor q_89 = mul(x = var_5185_0, y = layers_7_self_attn_q_norm_weight)[name = string("q_89")]; + tensor var_5192 = const()[name = string("op_5192"), val = tensor([1, 3, 8, 256])]; + tensor var_5193 = reshape(shape = var_5192, x = q_89)[name = string("op_5193")]; + tensor var_5198 = const()[name = string("op_5198"), val = tensor([0, 2, 1, 3])]; + tensor q_91 = transpose(perm = var_5198, x = var_5193)[name = string("transpose_97")]; + tensor var_5200_cast_fp16 = mul(x = q_91, y = cos_s)[name = string("op_5200_cast_fp16")]; + tensor var_5201_split_sizes_0 = const()[name = string("op_5201_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5201_axis_0 = const()[name = string("op_5201_axis_0"), val = int32(-1)]; + tensor var_5201_0, tensor var_5201_1 = split(axis = var_5201_axis_0, split_sizes = var_5201_split_sizes_0, x = q_91)[name = string("op_5201")]; + fp16 const_85_promoted = const()[name = string("const_85_promoted"), val = fp16(-0x1p+0)]; + tensor var_5203 = mul(x = var_5201_1, y = const_85_promoted)[name = string("op_5203")]; + int32 var_5205 = const()[name = string("op_5205"), val = int32(-1)]; + bool var_5206_interleave_0 = const()[name = string("op_5206_interleave_0"), val = bool(false)]; + tensor var_5206 = concat(axis = var_5205, interleave = var_5206_interleave_0, values = (var_5203, var_5201_0))[name = string("op_5206")]; + tensor var_5207_cast_fp16 = mul(x = var_5206, y = sin_s)[name = string("op_5207_cast_fp16")]; + tensor q_95_cast_fp16 = add(x = var_5200_cast_fp16, y = var_5207_cast_fp16)[name = string("q_95_cast_fp16")]; + string k_45_pad_type_0 = const()[name = string("k_45_pad_type_0"), val = string("valid")]; + tensor k_45_strides_0 = const()[name = string("k_45_strides_0"), val = tensor([1, 1])]; + tensor k_45_pad_0 = const()[name = string("k_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_45_dilations_0 = const()[name = string("k_45_dilations_0"), val = tensor([1, 1])]; + int32 k_45_groups_0 = const()[name = string("k_45_groups_0"), val = int32(1)]; + tensor k_45 = conv(dilations = k_45_dilations_0, groups = k_45_groups_0, pad = k_45_pad_0, pad_type = k_45_pad_type_0, strides = k_45_strides_0, weight = layers_7_self_attn_k_proj_weight_palettized, x = var_5127_cast_fp16)[name = string("k_45")]; + tensor var_5225 = const()[name = string("op_5225"), val = tensor([1, 2, 256, 3])]; + tensor var_5226 = reshape(shape = var_5225, x = k_45)[name = string("op_5226")]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_17_pad_type_0 = const()[name = string("v_17_pad_type_0"), val = string("valid")]; + tensor v_17_strides_0 = const()[name = string("v_17_strides_0"), val = tensor([1, 1])]; + tensor v_17_pad_0 = const()[name = string("v_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_17_dilations_0 = const()[name = string("v_17_dilations_0"), val = tensor([1, 1])]; + int32 v_17_groups_0 = const()[name = string("v_17_groups_0"), val = int32(1)]; + tensor v_17 = conv(dilations = v_17_dilations_0, groups = v_17_groups_0, pad = v_17_pad_0, pad_type = v_17_pad_type_0, strides = v_17_strides_0, weight = layers_7_self_attn_v_proj_weight_palettized, x = var_5127_cast_fp16)[name = string("v_17")]; + tensor var_5253 = const()[name = string("op_5253"), val = tensor([1, 2, 256, 3])]; + tensor var_5254 = reshape(shape = var_5253, x = v_17)[name = string("op_5254")]; + tensor var_5259 = const()[name = string("op_5259"), val = tensor([0, 1, 3, 2])]; + tensor var_5277 = const()[name = string("op_5277"), val = tensor([3, 2, 256])]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = var_5226)[name = string("transpose_96")]; + tensor x_143 = reshape(shape = var_5277, x = transpose_70)[name = string("x_143")]; + int32 var_5283 = const()[name = string("op_5283"), val = int32(-1)]; + fp16 const_86_promoted = const()[name = string("const_86_promoted"), val = fp16(-0x1p+0)]; + tensor var_5285 = mul(x = x_143, y = const_86_promoted)[name = string("op_5285")]; + bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; + tensor input_217 = concat(axis = var_5283, interleave = input_217_interleave_0, values = (x_143, var_5285))[name = string("input_217")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_5280_to_fp16 = const()[name = string("op_5280_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_5280_to_fp16, x = input_217)[name = string("normed_209_cast_fp16")]; + tensor var_5290_split_sizes_0 = const()[name = string("op_5290_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5290_axis_0 = const()[name = string("op_5290_axis_0"), val = int32(-1)]; + tensor var_5290_0, tensor var_5290_1 = split(axis = var_5290_axis_0, split_sizes = var_5290_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_5290")]; + tensor k_49 = mul(x = var_5290_0, y = layers_0_self_attn_k_norm_weight)[name = string("k_49")]; + tensor var_5297 = const()[name = string("op_5297"), val = tensor([1, 3, 2, 256])]; + tensor var_5298 = reshape(shape = var_5297, x = k_49)[name = string("op_5298")]; + tensor var_5303 = const()[name = string("op_5303"), val = tensor([0, 2, 1, 3])]; + fp16 var_5305_promoted = const()[name = string("op_5305_promoted"), val = fp16(0x1p+1)]; + tensor var_5260 = transpose(perm = var_5259, x = var_5254)[name = string("transpose_95")]; + tensor var_5306 = pow(x = var_5260, y = var_5305_promoted)[name = string("op_5306")]; + tensor var_5311_axes_0 = const()[name = string("op_5311_axes_0"), val = tensor([-1])]; + bool var_5311_keep_dims_0 = const()[name = string("op_5311_keep_dims_0"), val = bool(true)]; + tensor var_5311 = reduce_mean(axes = var_5311_axes_0, keep_dims = var_5311_keep_dims_0, x = var_5306)[name = string("op_5311")]; + fp16 var_5313_to_fp16 = const()[name = string("op_5313_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_15_cast_fp16 = add(x = var_5311, y = var_5313_to_fp16)[name = string("mean_sq_15_cast_fp16")]; + fp32 var_5315_epsilon_0 = const()[name = string("op_5315_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5315_cast_fp16 = rsqrt(epsilon = var_5315_epsilon_0, x = mean_sq_15_cast_fp16)[name = string("op_5315_cast_fp16")]; + tensor input_221_cast_fp16 = mul(x = var_5260, y = var_5315_cast_fp16)[name = string("input_221_cast_fp16")]; + tensor q_93 = transpose(perm = var_5303, x = var_5298)[name = string("transpose_94")]; + tensor var_5317_cast_fp16 = mul(x = q_93, y = cos_s)[name = string("op_5317_cast_fp16")]; + tensor var_5318_split_sizes_0 = const()[name = string("op_5318_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5318_axis_0 = const()[name = string("op_5318_axis_0"), val = int32(-1)]; + tensor var_5318_0, tensor var_5318_1 = split(axis = var_5318_axis_0, split_sizes = var_5318_split_sizes_0, x = q_93)[name = string("op_5318")]; + fp16 const_87_promoted = const()[name = string("const_87_promoted"), val = fp16(-0x1p+0)]; + tensor var_5320 = mul(x = var_5318_1, y = const_87_promoted)[name = string("op_5320")]; + int32 var_5322 = const()[name = string("op_5322"), val = int32(-1)]; + bool var_5323_interleave_0 = const()[name = string("op_5323_interleave_0"), val = bool(false)]; + tensor var_5323 = concat(axis = var_5322, interleave = var_5323_interleave_0, values = (var_5320, var_5318_0))[name = string("op_5323")]; + tensor var_5324_cast_fp16 = mul(x = var_5323, y = sin_s)[name = string("op_5324_cast_fp16")]; + tensor input_219_cast_fp16 = add(x = var_5317_cast_fp16, y = var_5324_cast_fp16)[name = string("input_219_cast_fp16")]; + tensor k_padded_13_pad_0 = const()[name = string("k_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_13_mode_0 = const()[name = string("k_padded_13_mode_0"), val = string("constant")]; + fp16 const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_13_cast_fp16 = pad(constant_val = const_88_to_fp16, mode = k_padded_13_mode_0, pad = k_padded_13_pad_0, x = input_219_cast_fp16)[name = string("k_padded_13_cast_fp16")]; + tensor v_padded_13_pad_0 = const()[name = string("v_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_13_mode_0 = const()[name = string("v_padded_13_mode_0"), val = string("constant")]; + fp16 const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_13_cast_fp16 = pad(constant_val = const_89_to_fp16, mode = v_padded_13_mode_0, pad = v_padded_13_pad_0, x = input_221_cast_fp16)[name = string("v_padded_13_cast_fp16")]; + tensor slot_k_15_begin_0 = const()[name = string("slot_k_15_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor slot_k_15_end_0 = const()[name = string("slot_k_15_end_0"), val = tensor([7, 2, 512, 512])]; + tensor slot_k_15_end_mask_0 = const()[name = string("slot_k_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_15_cast_fp16 = slice_by_index(begin = slot_k_15_begin_0, end = slot_k_15_end_0, end_mask = slot_k_15_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("slot_k_15_cast_fp16")]; + tensor slot_v_15_begin_0 = const()[name = string("slot_v_15_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor slot_v_15_end_0 = const()[name = string("slot_v_15_end_0"), val = tensor([7, 2, 512, 512])]; + tensor slot_v_15_end_mask_0 = const()[name = string("slot_v_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_15_cast_fp16 = slice_by_index(begin = slot_v_15_begin_0, end = slot_v_15_end_0, end_mask = slot_v_15_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("slot_v_15_cast_fp16")]; + tensor var_5363_begin_0 = const()[name = string("op_5363_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_5363_end_0 = const()[name = string("op_5363_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5363_end_mask_0 = const()[name = string("op_5363_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5363_cast_fp16 = slice_by_index(begin = var_5363_begin_0, end = var_5363_end_0, end_mask = var_5363_end_mask_0, x = slot_k_15_cast_fp16)[name = string("op_5363_cast_fp16")]; + int32 var_5370 = const()[name = string("op_5370"), val = int32(2)]; + bool new_k_15_interleave_0 = const()[name = string("new_k_15_interleave_0"), val = bool(false)]; + tensor new_k_15_cast_fp16 = concat(axis = var_5370, interleave = new_k_15_interleave_0, values = (var_5363_cast_fp16, k_padded_13_cast_fp16))[name = string("new_k_15_cast_fp16")]; + tensor var_5386_begin_0 = const()[name = string("op_5386_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_5386_end_0 = const()[name = string("op_5386_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5386_end_mask_0 = const()[name = string("op_5386_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5386_cast_fp16 = slice_by_index(begin = var_5386_begin_0, end = var_5386_end_0, end_mask = var_5386_end_mask_0, x = slot_v_15_cast_fp16)[name = string("op_5386_cast_fp16")]; + int32 var_5393 = const()[name = string("op_5393"), val = int32(2)]; + bool new_v_15_interleave_0 = const()[name = string("new_v_15_interleave_0"), val = bool(false)]; + tensor new_v_15_cast_fp16 = concat(axis = var_5393, interleave = new_v_15_interleave_0, values = (var_5386_cast_fp16, v_padded_13_cast_fp16))[name = string("new_v_15_cast_fp16")]; + tensor var_5399_begin_0 = const()[name = string("op_5399_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5399_end_0 = const()[name = string("op_5399_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_5399_end_mask_0 = const()[name = string("op_5399_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5399_cast_fp16 = slice_by_index(begin = var_5399_begin_0, end = var_5399_end_0, end_mask = var_5399_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("op_5399_cast_fp16")]; + tensor var_5404_begin_0 = const()[name = string("op_5404_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5404_end_0 = const()[name = string("op_5404_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_5404_end_mask_0 = const()[name = string("op_5404_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5404_cast_fp16 = slice_by_index(begin = var_5404_begin_0, end = var_5404_end_0, end_mask = var_5404_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("op_5404_cast_fp16")]; + int32 var_5406 = const()[name = string("op_5406"), val = int32(0)]; + bool K_sliding_out_13_interleave_0 = const()[name = string("K_sliding_out_13_interleave_0"), val = bool(false)]; + tensor K_sliding_out_13_cast_fp16 = concat(axis = var_5406, interleave = K_sliding_out_13_interleave_0, values = (var_5399_cast_fp16, new_k_15_cast_fp16, var_5404_cast_fp16))[name = string("K_sliding_out_13_cast_fp16")]; + tensor var_5412_begin_0 = const()[name = string("op_5412_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5412_end_0 = const()[name = string("op_5412_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_5412_end_mask_0 = const()[name = string("op_5412_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5412_cast_fp16 = slice_by_index(begin = var_5412_begin_0, end = var_5412_end_0, end_mask = var_5412_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("op_5412_cast_fp16")]; + tensor var_5417_begin_0 = const()[name = string("op_5417_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5417_end_0 = const()[name = string("op_5417_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_5417_end_mask_0 = const()[name = string("op_5417_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5417_cast_fp16 = slice_by_index(begin = var_5417_begin_0, end = var_5417_end_0, end_mask = var_5417_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("op_5417_cast_fp16")]; + int32 var_5419 = const()[name = string("op_5419"), val = int32(0)]; + bool V_sliding_out_13_interleave_0 = const()[name = string("V_sliding_out_13_interleave_0"), val = bool(false)]; + tensor V_sliding_out_13_cast_fp16 = concat(axis = var_5419, interleave = V_sliding_out_13_interleave_0, values = (var_5412_cast_fp16, new_v_15_cast_fp16, var_5417_cast_fp16))[name = string("V_sliding_out_13_cast_fp16")]; + tensor var_5425_begin_0 = const()[name = string("op_5425_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_5425_end_0 = const()[name = string("op_5425_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_5425_end_mask_0 = const()[name = string("op_5425_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5425_cast_fp16 = slice_by_index(begin = var_5425_begin_0, end = var_5425_end_0, end_mask = var_5425_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("op_5425_cast_fp16")]; + tensor K_for_attn_15_begin_0 = const()[name = string("K_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_15_end_0 = const()[name = string("K_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_15_end_mask_0 = const()[name = string("K_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_15_cast_fp16 = slice_by_index(begin = K_for_attn_15_begin_0, end = K_for_attn_15_end_0, end_mask = K_for_attn_15_end_mask_0, x = var_5425_cast_fp16)[name = string("K_for_attn_15_cast_fp16")]; + tensor var_5435_begin_0 = const()[name = string("op_5435_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_5435_end_0 = const()[name = string("op_5435_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_5435_end_mask_0 = const()[name = string("op_5435_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5435_cast_fp16 = slice_by_index(begin = var_5435_begin_0, end = var_5435_end_0, end_mask = var_5435_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("op_5435_cast_fp16")]; + tensor V_for_attn_15_begin_0 = const()[name = string("V_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_15_end_0 = const()[name = string("V_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_15_end_mask_0 = const()[name = string("V_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_15_cast_fp16 = slice_by_index(begin = V_for_attn_15_begin_0, end = V_for_attn_15_end_0, end_mask = V_for_attn_15_end_mask_0, x = var_5435_cast_fp16)[name = string("V_for_attn_15_cast_fp16")]; + tensor transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_14_reps_0 = const()[name = string("tile_14_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_28_cast_fp16 = transpose(perm = transpose_28_perm_0, x = K_for_attn_15_cast_fp16)[name = string("transpose_93")]; + tensor tile_14_cast_fp16 = tile(reps = tile_14_reps_0, x = transpose_28_cast_fp16)[name = string("tile_14_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_28_cast_fp16 = reshape(shape = concat_30, x = tile_14_cast_fp16)[name = string("reshape_28_cast_fp16")]; + tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = reshape_28_cast_fp16)[name = string("transpose_92")]; + tensor reshape_29_cast_fp16 = reshape(shape = concat_31, x = transpose_29_cast_fp16)[name = string("reshape_29_cast_fp16")]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_30_perm_0 = const()[name = string("transpose_30_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_15_reps_0 = const()[name = string("tile_15_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_30_cast_fp16 = transpose(perm = transpose_30_perm_0, x = V_for_attn_15_cast_fp16)[name = string("transpose_91")]; + tensor tile_15_cast_fp16 = tile(reps = tile_15_reps_0, x = transpose_30_cast_fp16)[name = string("tile_15_cast_fp16")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_30_cast_fp16 = reshape(shape = concat_32, x = tile_15_cast_fp16)[name = string("reshape_30_cast_fp16")]; + tensor transpose_31_perm_0 = const()[name = string("transpose_31_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_33 = const()[name = string("concat_33"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_31_cast_fp16 = transpose(perm = transpose_31_perm_0, x = reshape_30_cast_fp16)[name = string("transpose_90")]; + tensor reshape_31_cast_fp16 = reshape(shape = concat_33, x = transpose_31_cast_fp16)[name = string("reshape_31_cast_fp16")]; + tensor V_expanded_15_perm_0 = const()[name = string("V_expanded_15_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor transpose_71_cast_fp16 = transpose(perm = transpose_71_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_89")]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_95_cast_fp16, y = transpose_71_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_147_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_147_cast_fp16)[name = string("reduce_max_7")]; + tensor var_5470 = sub(x = x_147_cast_fp16, y = reduce_max_7)[name = string("op_5470")]; + tensor var_5476 = exp(x = var_5470)[name = string("op_5476")]; + tensor var_5486_axes_0 = const()[name = string("op_5486_axes_0"), val = tensor([-1])]; + bool var_5486_keep_dims_0 = const()[name = string("op_5486_keep_dims_0"), val = bool(true)]; + tensor var_5486 = reduce_sum(axes = var_5486_axes_0, keep_dims = var_5486_keep_dims_0, x = var_5476)[name = string("op_5486")]; + tensor var_5492_cast_fp16 = real_div(x = var_5476, y = var_5486)[name = string("op_5492_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor V_expanded_15_cast_fp16 = transpose(perm = V_expanded_15_perm_0, x = reshape_31_cast_fp16)[name = string("transpose_88")]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_5492_cast_fp16, y = V_expanded_15_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_5503 = const()[name = string("op_5503"), val = tensor([0, 2, 1, 3])]; + tensor var_5510 = const()[name = string("op_5510"), val = tensor([1, 3, -1])]; + tensor var_5504_cast_fp16 = transpose(perm = var_5503, x = attn_output_43_cast_fp16)[name = string("transpose_87")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_5510, x = var_5504_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_5515 = const()[name = string("op_5515"), val = tensor([0, 2, 1])]; + string var_5531_pad_type_0 = const()[name = string("op_5531_pad_type_0"), val = string("valid")]; + int32 var_5531_groups_0 = const()[name = string("op_5531_groups_0"), val = int32(1)]; + tensor var_5531_strides_0 = const()[name = string("op_5531_strides_0"), val = tensor([1])]; + tensor var_5531_pad_0 = const()[name = string("op_5531_pad_0"), val = tensor([0, 0])]; + tensor var_5531_dilations_0 = const()[name = string("op_5531_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568478016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571099520))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5516_cast_fp16 = transpose(perm = var_5515, x = attn_output_45_cast_fp16)[name = string("transpose_86")]; + tensor var_5531_cast_fp16 = conv(dilations = var_5531_dilations_0, groups = var_5531_groups_0, pad = var_5531_pad_0, pad_type = var_5531_pad_type_0, strides = var_5531_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5516_cast_fp16)[name = string("op_5531_cast_fp16")]; + tensor var_5535 = const()[name = string("op_5535"), val = tensor([0, 2, 1])]; + int32 var_5541 = const()[name = string("op_5541"), val = int32(-1)]; + fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_151_cast_fp16 = transpose(perm = var_5535, x = var_5531_cast_fp16)[name = string("transpose_85")]; + tensor var_5543_cast_fp16 = mul(x = x_151_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_5543_cast_fp16")]; + bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; + tensor input_225_cast_fp16 = concat(axis = var_5541, interleave = input_225_interleave_0, values = (x_151_cast_fp16, var_5543_cast_fp16))[name = string("input_225_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_5538_to_fp16 = const()[name = string("op_5538_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_5538_to_fp16, x = input_225_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_5548_split_sizes_0 = const()[name = string("op_5548_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5548_axis_0 = const()[name = string("op_5548_axis_0"), val = int32(-1)]; + tensor var_5548_cast_fp16_0, tensor var_5548_cast_fp16_1 = split(axis = var_5548_axis_0, split_sizes = var_5548_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_5548_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571102144)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_5548_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_139_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_153_cast_fp16")]; + int32 var_5557 = const()[name = string("op_5557"), val = int32(-1)]; + fp16 const_91_promoted_to_fp16 = const()[name = string("const_91_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5559_cast_fp16 = mul(x = x_153_cast_fp16, y = const_91_promoted_to_fp16)[name = string("op_5559_cast_fp16")]; + bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; + tensor input_227_cast_fp16 = concat(axis = var_5557, interleave = input_227_interleave_0, values = (x_153_cast_fp16, var_5559_cast_fp16))[name = string("input_227_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_5554_to_fp16 = const()[name = string("op_5554_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_5554_to_fp16, x = input_227_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor var_5564_split_sizes_0 = const()[name = string("op_5564_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5564_axis_0 = const()[name = string("op_5564_axis_0"), val = int32(-1)]; + tensor var_5564_cast_fp16_0, tensor var_5564_cast_fp16_1 = split(axis = var_5564_axis_0, split_sizes = var_5564_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_5564_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571107328)))]; + tensor h_45_cast_fp16 = mul(x = var_5564_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_5575 = const()[name = string("op_5575"), val = tensor([0, 2, 1])]; + tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; + tensor var_5576 = transpose(perm = var_5575, x = h_45_cast_fp16)[name = string("transpose_84")]; + tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_5576)[name = string("input_229")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_229)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_229)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_231 = mul(x = gate_31, y = up_15)[name = string("input_231")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_231)[name = string("mlp_out_15")]; + tensor var_5616_axes_0 = const()[name = string("op_5616_axes_0"), val = tensor([2])]; + tensor var_5616 = squeeze(axes = var_5616_axes_0, x = mlp_out_15)[name = string("op_5616")]; + tensor var_5620 = const()[name = string("op_5620"), val = tensor([0, 2, 1])]; + int32 var_5626 = const()[name = string("op_5626"), val = int32(-1)]; + fp16 const_92_promoted = const()[name = string("const_92_promoted"), val = fp16(-0x1p+0)]; + tensor x_155 = transpose(perm = var_5620, x = var_5616)[name = string("transpose_83")]; + tensor var_5628 = mul(x = x_155, y = const_92_promoted)[name = string("op_5628")]; + bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; + tensor input_233 = concat(axis = var_5626, interleave = input_233_interleave_0, values = (x_155, var_5628))[name = string("input_233")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_5623_to_fp16 = const()[name = string("op_5623_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_5623_to_fp16, x = input_233)[name = string("normed_221_cast_fp16")]; + tensor var_5633_split_sizes_0 = const()[name = string("op_5633_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5633_axis_0 = const()[name = string("op_5633_axis_0"), val = int32(-1)]; + tensor var_5633_0, tensor var_5633_1 = split(axis = var_5633_axis_0, split_sizes = var_5633_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_5633")]; + tensor hidden_states_73 = mul(x = var_5633_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_153_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 1792])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 3, 2048])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_5661 = const()[name = string("op_5661"), val = tensor([0, 2, 1])]; + tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; + tensor var_5662 = transpose(perm = var_5661, x = hidden_states_75_cast_fp16)[name = string("transpose_82")]; + tensor input_235 = expand_dims(axes = input_235_axes_0, x = var_5662)[name = string("input_235")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_235)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_5681 = const()[name = string("op_5681"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_5682_cast_fp16 = transpose(perm = var_5681, x = per_layer_slice_15_cast_fp16)[name = string("transpose_81")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_5682_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_237_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_237_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571112512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571440256))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_5698_axes_0 = const()[name = string("op_5698_axes_0"), val = tensor([2])]; + tensor var_5698_cast_fp16 = squeeze(axes = var_5698_axes_0, x = gated_47_cast_fp16)[name = string("op_5698_cast_fp16")]; + tensor var_5702 = const()[name = string("op_5702"), val = tensor([0, 2, 1])]; + int32 var_5708 = const()[name = string("op_5708"), val = int32(-1)]; + fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_157_cast_fp16 = transpose(perm = var_5702, x = var_5698_cast_fp16)[name = string("transpose_80")]; + tensor var_5710_cast_fp16 = mul(x = x_157_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_5710_cast_fp16")]; + bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; + tensor input_239_cast_fp16 = concat(axis = var_5708, interleave = input_239_interleave_0, values = (x_157_cast_fp16, var_5710_cast_fp16))[name = string("input_239_cast_fp16")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_5705_to_fp16 = const()[name = string("op_5705_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_5705_to_fp16, x = input_239_cast_fp16)[name = string("normed_225_cast_fp16")]; + tensor var_5715_split_sizes_0 = const()[name = string("op_5715_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5715_axis_0 = const()[name = string("op_5715_axis_0"), val = int32(-1)]; + tensor var_5715_cast_fp16_0, tensor var_5715_cast_fp16_1 = split(axis = var_5715_axis_0, split_sizes = var_5715_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_5715_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571442880)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_5715_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = tensor([0x1.58p-1])]; + tensor x_159_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_94_promoted_to_fp16)[name = string("x_159_cast_fp16")]; + int32 var_5730 = const()[name = string("op_5730"), val = int32(-1)]; + fp16 const_95_promoted_to_fp16 = const()[name = string("const_95_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5732_cast_fp16 = mul(x = x_159_cast_fp16, y = const_95_promoted_to_fp16)[name = string("op_5732_cast_fp16")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241_cast_fp16 = concat(axis = var_5730, interleave = input_241_interleave_0, values = (x_159_cast_fp16, var_5732_cast_fp16))[name = string("input_241_cast_fp16")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_5727_to_fp16 = const()[name = string("op_5727_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_5727_to_fp16, x = input_241_cast_fp16)[name = string("normed_229_cast_fp16")]; + tensor var_5737_split_sizes_0 = const()[name = string("op_5737_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5737_axis_0 = const()[name = string("op_5737_axis_0"), val = int32(-1)]; + tensor var_5737_cast_fp16_0, tensor var_5737_cast_fp16_1 = split(axis = var_5737_axis_0, split_sizes = var_5737_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_5737_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571448064)))]; + tensor h_49_cast_fp16 = mul(x = var_5737_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_5743 = const()[name = string("op_5743"), val = tensor([0, 2, 1])]; + tensor var_5746_axes_0 = const()[name = string("op_5746_axes_0"), val = tensor([2])]; + tensor var_5744_cast_fp16 = transpose(perm = var_5743, x = h_49_cast_fp16)[name = string("transpose_79")]; + tensor var_5746_cast_fp16 = expand_dims(axes = var_5746_axes_0, x = var_5744_cast_fp16)[name = string("op_5746_cast_fp16")]; + string q_97_pad_type_0 = const()[name = string("q_97_pad_type_0"), val = string("valid")]; + tensor q_97_strides_0 = const()[name = string("q_97_strides_0"), val = tensor([1, 1])]; + tensor q_97_pad_0 = const()[name = string("q_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_97_dilations_0 = const()[name = string("q_97_dilations_0"), val = tensor([1, 1])]; + int32 q_97_groups_0 = const()[name = string("q_97_groups_0"), val = int32(1)]; + tensor q_97 = conv(dilations = q_97_dilations_0, groups = q_97_groups_0, pad = q_97_pad_0, pad_type = q_97_pad_type_0, strides = q_97_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_5746_cast_fp16)[name = string("q_97")]; + tensor var_5767 = const()[name = string("op_5767"), val = tensor([1, 8, 256, 3])]; + tensor var_5768 = reshape(shape = var_5767, x = q_97)[name = string("op_5768")]; + tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_5791 = const()[name = string("op_5791"), val = tensor([3, 8, 256])]; + tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = var_5768)[name = string("transpose_78")]; + tensor x_161 = reshape(shape = var_5791, x = transpose_72)[name = string("x_161")]; + int32 var_5797 = const()[name = string("op_5797"), val = int32(-1)]; + fp16 const_96_promoted = const()[name = string("const_96_promoted"), val = fp16(-0x1p+0)]; + tensor var_5799 = mul(x = x_161, y = const_96_promoted)[name = string("op_5799")]; + bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; + tensor input_245 = concat(axis = var_5797, interleave = input_245_interleave_0, values = (x_161, var_5799))[name = string("input_245")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_5794_to_fp16 = const()[name = string("op_5794_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_5794_to_fp16, x = input_245)[name = string("normed_233_cast_fp16")]; + tensor var_5804_split_sizes_0 = const()[name = string("op_5804_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5804_axis_0 = const()[name = string("op_5804_axis_0"), val = int32(-1)]; + tensor var_5804_0, tensor var_5804_1 = split(axis = var_5804_axis_0, split_sizes = var_5804_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_5804")]; + tensor q_101 = mul(x = var_5804_0, y = layers_8_self_attn_q_norm_weight)[name = string("q_101")]; + tensor var_5811 = const()[name = string("op_5811"), val = tensor([1, 3, 8, 256])]; + tensor var_5812 = reshape(shape = var_5811, x = q_101)[name = string("op_5812")]; + tensor var_5817 = const()[name = string("op_5817"), val = tensor([0, 2, 1, 3])]; + tensor q_103 = transpose(perm = var_5817, x = var_5812)[name = string("transpose_77")]; + tensor var_5819_cast_fp16 = mul(x = q_103, y = cos_s)[name = string("op_5819_cast_fp16")]; + tensor var_5820_split_sizes_0 = const()[name = string("op_5820_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5820_axis_0 = const()[name = string("op_5820_axis_0"), val = int32(-1)]; + tensor var_5820_0, tensor var_5820_1 = split(axis = var_5820_axis_0, split_sizes = var_5820_split_sizes_0, x = q_103)[name = string("op_5820")]; + fp16 const_97_promoted = const()[name = string("const_97_promoted"), val = fp16(-0x1p+0)]; + tensor var_5822 = mul(x = var_5820_1, y = const_97_promoted)[name = string("op_5822")]; + int32 var_5824 = const()[name = string("op_5824"), val = int32(-1)]; + bool var_5825_interleave_0 = const()[name = string("op_5825_interleave_0"), val = bool(false)]; + tensor var_5825 = concat(axis = var_5824, interleave = var_5825_interleave_0, values = (var_5822, var_5820_0))[name = string("op_5825")]; + tensor var_5826_cast_fp16 = mul(x = var_5825, y = sin_s)[name = string("op_5826_cast_fp16")]; + tensor q_107_cast_fp16 = add(x = var_5819_cast_fp16, y = var_5826_cast_fp16)[name = string("q_107_cast_fp16")]; + string k_51_pad_type_0 = const()[name = string("k_51_pad_type_0"), val = string("valid")]; + tensor k_51_strides_0 = const()[name = string("k_51_strides_0"), val = tensor([1, 1])]; + tensor k_51_pad_0 = const()[name = string("k_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_51_dilations_0 = const()[name = string("k_51_dilations_0"), val = tensor([1, 1])]; + int32 k_51_groups_0 = const()[name = string("k_51_groups_0"), val = int32(1)]; + tensor k_51 = conv(dilations = k_51_dilations_0, groups = k_51_groups_0, pad = k_51_pad_0, pad_type = k_51_pad_type_0, strides = k_51_strides_0, weight = layers_8_self_attn_k_proj_weight_palettized, x = var_5746_cast_fp16)[name = string("k_51")]; + tensor var_5844 = const()[name = string("op_5844"), val = tensor([1, 2, 256, 3])]; + tensor var_5845 = reshape(shape = var_5844, x = k_51)[name = string("op_5845")]; + tensor transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_19_pad_type_0 = const()[name = string("v_19_pad_type_0"), val = string("valid")]; + tensor v_19_strides_0 = const()[name = string("v_19_strides_0"), val = tensor([1, 1])]; + tensor v_19_pad_0 = const()[name = string("v_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_19_dilations_0 = const()[name = string("v_19_dilations_0"), val = tensor([1, 1])]; + int32 v_19_groups_0 = const()[name = string("v_19_groups_0"), val = int32(1)]; + tensor v_19 = conv(dilations = v_19_dilations_0, groups = v_19_groups_0, pad = v_19_pad_0, pad_type = v_19_pad_type_0, strides = v_19_strides_0, weight = layers_8_self_attn_v_proj_weight_palettized, x = var_5746_cast_fp16)[name = string("v_19")]; + tensor var_5872 = const()[name = string("op_5872"), val = tensor([1, 2, 256, 3])]; + tensor var_5873 = reshape(shape = var_5872, x = v_19)[name = string("op_5873")]; + tensor var_5878 = const()[name = string("op_5878"), val = tensor([0, 1, 3, 2])]; + tensor var_5896 = const()[name = string("op_5896"), val = tensor([3, 2, 256])]; + tensor transpose_73 = transpose(perm = transpose_73_perm_0, x = var_5845)[name = string("transpose_76")]; + tensor x_163 = reshape(shape = var_5896, x = transpose_73)[name = string("x_163")]; + int32 var_5902 = const()[name = string("op_5902"), val = int32(-1)]; + fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; + tensor var_5904 = mul(x = x_163, y = const_98_promoted)[name = string("op_5904")]; + bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; + tensor input_247 = concat(axis = var_5902, interleave = input_247_interleave_0, values = (x_163, var_5904))[name = string("input_247")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_5899_to_fp16 = const()[name = string("op_5899_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_5899_to_fp16, x = input_247)[name = string("normed_237_cast_fp16")]; + tensor var_5909_split_sizes_0 = const()[name = string("op_5909_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5909_axis_0 = const()[name = string("op_5909_axis_0"), val = int32(-1)]; + tensor var_5909_0, tensor var_5909_1 = split(axis = var_5909_axis_0, split_sizes = var_5909_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_5909")]; + tensor k_55 = mul(x = var_5909_0, y = layers_8_self_attn_k_norm_weight)[name = string("k_55")]; + tensor var_5916 = const()[name = string("op_5916"), val = tensor([1, 3, 2, 256])]; + tensor var_5917 = reshape(shape = var_5916, x = k_55)[name = string("op_5917")]; + tensor var_5922 = const()[name = string("op_5922"), val = tensor([0, 2, 1, 3])]; + fp16 var_5924_promoted = const()[name = string("op_5924_promoted"), val = fp16(0x1p+1)]; + tensor var_5879 = transpose(perm = var_5878, x = var_5873)[name = string("transpose_75")]; + tensor var_5925 = pow(x = var_5879, y = var_5924_promoted)[name = string("op_5925")]; + tensor var_5930_axes_0 = const()[name = string("op_5930_axes_0"), val = tensor([-1])]; + bool var_5930_keep_dims_0 = const()[name = string("op_5930_keep_dims_0"), val = bool(true)]; + tensor var_5930 = reduce_mean(axes = var_5930_axes_0, keep_dims = var_5930_keep_dims_0, x = var_5925)[name = string("op_5930")]; + fp16 var_5932_to_fp16 = const()[name = string("op_5932_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_17_cast_fp16 = add(x = var_5930, y = var_5932_to_fp16)[name = string("mean_sq_17_cast_fp16")]; + fp32 var_5934_epsilon_0 = const()[name = string("op_5934_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5934_cast_fp16 = rsqrt(epsilon = var_5934_epsilon_0, x = mean_sq_17_cast_fp16)[name = string("op_5934_cast_fp16")]; + tensor input_251_cast_fp16 = mul(x = var_5879, y = var_5934_cast_fp16)[name = string("input_251_cast_fp16")]; + tensor q_105 = transpose(perm = var_5922, x = var_5917)[name = string("transpose_74")]; + tensor var_5936_cast_fp16 = mul(x = q_105, y = cos_s)[name = string("op_5936_cast_fp16")]; + tensor var_5937_split_sizes_0 = const()[name = string("op_5937_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5937_axis_0 = const()[name = string("op_5937_axis_0"), val = int32(-1)]; + tensor var_5937_0, tensor var_5937_1 = split(axis = var_5937_axis_0, split_sizes = var_5937_split_sizes_0, x = q_105)[name = string("op_5937")]; + fp16 const_99_promoted = const()[name = string("const_99_promoted"), val = fp16(-0x1p+0)]; + tensor var_5939 = mul(x = var_5937_1, y = const_99_promoted)[name = string("op_5939")]; + int32 var_5941 = const()[name = string("op_5941"), val = int32(-1)]; + bool var_5942_interleave_0 = const()[name = string("op_5942_interleave_0"), val = bool(false)]; + tensor var_5942 = concat(axis = var_5941, interleave = var_5942_interleave_0, values = (var_5939, var_5937_0))[name = string("op_5942")]; + tensor var_5943_cast_fp16 = mul(x = var_5942, y = sin_s)[name = string("op_5943_cast_fp16")]; + tensor input_249_cast_fp16 = add(x = var_5936_cast_fp16, y = var_5943_cast_fp16)[name = string("input_249_cast_fp16")]; + tensor k_padded_15_pad_0 = const()[name = string("k_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_15_mode_0 = const()[name = string("k_padded_15_mode_0"), val = string("constant")]; + fp16 const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_15_cast_fp16 = pad(constant_val = const_100_to_fp16, mode = k_padded_15_mode_0, pad = k_padded_15_pad_0, x = input_249_cast_fp16)[name = string("k_padded_15_cast_fp16")]; + tensor v_padded_15_pad_0 = const()[name = string("v_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_15_mode_0 = const()[name = string("v_padded_15_mode_0"), val = string("constant")]; + fp16 const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_15_cast_fp16 = pad(constant_val = const_101_to_fp16, mode = v_padded_15_mode_0, pad = v_padded_15_pad_0, x = input_251_cast_fp16)[name = string("v_padded_15_cast_fp16")]; + tensor slot_k_17_begin_0 = const()[name = string("slot_k_17_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor slot_k_17_end_0 = const()[name = string("slot_k_17_end_0"), val = tensor([8, 2, 512, 512])]; + tensor slot_k_17_end_mask_0 = const()[name = string("slot_k_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_17_cast_fp16 = slice_by_index(begin = slot_k_17_begin_0, end = slot_k_17_end_0, end_mask = slot_k_17_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("slot_k_17_cast_fp16")]; + tensor slot_v_17_begin_0 = const()[name = string("slot_v_17_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor slot_v_17_end_0 = const()[name = string("slot_v_17_end_0"), val = tensor([8, 2, 512, 512])]; + tensor slot_v_17_end_mask_0 = const()[name = string("slot_v_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_17_cast_fp16 = slice_by_index(begin = slot_v_17_begin_0, end = slot_v_17_end_0, end_mask = slot_v_17_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("slot_v_17_cast_fp16")]; + tensor var_5982_begin_0 = const()[name = string("op_5982_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_5982_end_0 = const()[name = string("op_5982_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5982_end_mask_0 = const()[name = string("op_5982_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5982_cast_fp16 = slice_by_index(begin = var_5982_begin_0, end = var_5982_end_0, end_mask = var_5982_end_mask_0, x = slot_k_17_cast_fp16)[name = string("op_5982_cast_fp16")]; + int32 var_5989 = const()[name = string("op_5989"), val = int32(2)]; + bool new_k_17_interleave_0 = const()[name = string("new_k_17_interleave_0"), val = bool(false)]; + tensor new_k_17_cast_fp16 = concat(axis = var_5989, interleave = new_k_17_interleave_0, values = (var_5982_cast_fp16, k_padded_15_cast_fp16))[name = string("new_k_17_cast_fp16")]; + tensor var_6005_begin_0 = const()[name = string("op_6005_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_6005_end_0 = const()[name = string("op_6005_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6005_end_mask_0 = const()[name = string("op_6005_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6005_cast_fp16 = slice_by_index(begin = var_6005_begin_0, end = var_6005_end_0, end_mask = var_6005_end_mask_0, x = slot_v_17_cast_fp16)[name = string("op_6005_cast_fp16")]; + int32 var_6012 = const()[name = string("op_6012"), val = int32(2)]; + bool new_v_17_interleave_0 = const()[name = string("new_v_17_interleave_0"), val = bool(false)]; + tensor new_v_17_cast_fp16 = concat(axis = var_6012, interleave = new_v_17_interleave_0, values = (var_6005_cast_fp16, v_padded_15_cast_fp16))[name = string("new_v_17_cast_fp16")]; + tensor var_6018_begin_0 = const()[name = string("op_6018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6018_end_0 = const()[name = string("op_6018_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_6018_end_mask_0 = const()[name = string("op_6018_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6018_cast_fp16 = slice_by_index(begin = var_6018_begin_0, end = var_6018_end_0, end_mask = var_6018_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("op_6018_cast_fp16")]; + tensor var_6023_begin_0 = const()[name = string("op_6023_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6023_end_0 = const()[name = string("op_6023_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6023_end_mask_0 = const()[name = string("op_6023_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6023_cast_fp16 = slice_by_index(begin = var_6023_begin_0, end = var_6023_end_0, end_mask = var_6023_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("op_6023_cast_fp16")]; + int32 var_6025 = const()[name = string("op_6025"), val = int32(0)]; + bool K_sliding_out_15_interleave_0 = const()[name = string("K_sliding_out_15_interleave_0"), val = bool(false)]; + tensor K_sliding_out_15_cast_fp16 = concat(axis = var_6025, interleave = K_sliding_out_15_interleave_0, values = (var_6018_cast_fp16, new_k_17_cast_fp16, var_6023_cast_fp16))[name = string("K_sliding_out_15_cast_fp16")]; + tensor var_6031_begin_0 = const()[name = string("op_6031_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6031_end_0 = const()[name = string("op_6031_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_6031_end_mask_0 = const()[name = string("op_6031_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6031_cast_fp16 = slice_by_index(begin = var_6031_begin_0, end = var_6031_end_0, end_mask = var_6031_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("op_6031_cast_fp16")]; + tensor var_6036_begin_0 = const()[name = string("op_6036_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6036_end_0 = const()[name = string("op_6036_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6036_end_mask_0 = const()[name = string("op_6036_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6036_cast_fp16 = slice_by_index(begin = var_6036_begin_0, end = var_6036_end_0, end_mask = var_6036_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("op_6036_cast_fp16")]; + int32 var_6038 = const()[name = string("op_6038"), val = int32(0)]; + bool V_sliding_out_15_interleave_0 = const()[name = string("V_sliding_out_15_interleave_0"), val = bool(false)]; + tensor V_sliding_out_15_cast_fp16 = concat(axis = var_6038, interleave = V_sliding_out_15_interleave_0, values = (var_6031_cast_fp16, new_v_17_cast_fp16, var_6036_cast_fp16))[name = string("V_sliding_out_15_cast_fp16")]; + tensor var_6044_begin_0 = const()[name = string("op_6044_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_6044_end_0 = const()[name = string("op_6044_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_6044_end_mask_0 = const()[name = string("op_6044_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6044_cast_fp16 = slice_by_index(begin = var_6044_begin_0, end = var_6044_end_0, end_mask = var_6044_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("op_6044_cast_fp16")]; + tensor K_for_attn_17_begin_0 = const()[name = string("K_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_17_end_0 = const()[name = string("K_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_17_end_mask_0 = const()[name = string("K_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_17_cast_fp16 = slice_by_index(begin = K_for_attn_17_begin_0, end = K_for_attn_17_end_0, end_mask = K_for_attn_17_end_mask_0, x = var_6044_cast_fp16)[name = string("K_for_attn_17_cast_fp16")]; + tensor var_6054_begin_0 = const()[name = string("op_6054_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_6054_end_0 = const()[name = string("op_6054_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_6054_end_mask_0 = const()[name = string("op_6054_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6054_cast_fp16 = slice_by_index(begin = var_6054_begin_0, end = var_6054_end_0, end_mask = var_6054_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("op_6054_cast_fp16")]; + tensor V_for_attn_17_begin_0 = const()[name = string("V_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_17_end_0 = const()[name = string("V_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_17_end_mask_0 = const()[name = string("V_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_17_cast_fp16 = slice_by_index(begin = V_for_attn_17_begin_0, end = V_for_attn_17_end_0, end_mask = V_for_attn_17_end_mask_0, x = var_6054_cast_fp16)[name = string("V_for_attn_17_cast_fp16")]; + tensor transpose_32_perm_0 = const()[name = string("transpose_32_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_16_reps_0 = const()[name = string("tile_16_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_32_cast_fp16 = transpose(perm = transpose_32_perm_0, x = K_for_attn_17_cast_fp16)[name = string("transpose_73")]; + tensor tile_16_cast_fp16 = tile(reps = tile_16_reps_0, x = transpose_32_cast_fp16)[name = string("tile_16_cast_fp16")]; + tensor concat_34 = const()[name = string("concat_34"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_32_cast_fp16 = reshape(shape = concat_34, x = tile_16_cast_fp16)[name = string("reshape_32_cast_fp16")]; + tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_33_cast_fp16 = transpose(perm = transpose_33_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_72")]; + tensor reshape_33_cast_fp16 = reshape(shape = concat_35, x = transpose_33_cast_fp16)[name = string("reshape_33_cast_fp16")]; + tensor transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_17_reps_0 = const()[name = string("tile_17_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_34_cast_fp16 = transpose(perm = transpose_34_perm_0, x = V_for_attn_17_cast_fp16)[name = string("transpose_71")]; + tensor tile_17_cast_fp16 = tile(reps = tile_17_reps_0, x = transpose_34_cast_fp16)[name = string("tile_17_cast_fp16")]; + tensor concat_36 = const()[name = string("concat_36"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_34_cast_fp16 = reshape(shape = concat_36, x = tile_17_cast_fp16)[name = string("reshape_34_cast_fp16")]; + tensor transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_37 = const()[name = string("concat_37"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_35_cast_fp16 = transpose(perm = transpose_35_perm_0, x = reshape_34_cast_fp16)[name = string("transpose_70")]; + tensor reshape_35_cast_fp16 = reshape(shape = concat_37, x = transpose_35_cast_fp16)[name = string("reshape_35_cast_fp16")]; + tensor V_expanded_17_perm_0 = const()[name = string("V_expanded_17_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor transpose_74_cast_fp16 = transpose(perm = transpose_74_perm_0, x = reshape_33_cast_fp16)[name = string("transpose_69")]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_107_cast_fp16, y = transpose_74_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_167_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_167_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_167_cast_fp16)[name = string("reduce_max_8")]; + tensor var_6089 = sub(x = x_167_cast_fp16, y = reduce_max_8)[name = string("op_6089")]; + tensor var_6095 = exp(x = var_6089)[name = string("op_6095")]; + tensor var_6105_axes_0 = const()[name = string("op_6105_axes_0"), val = tensor([-1])]; + bool var_6105_keep_dims_0 = const()[name = string("op_6105_keep_dims_0"), val = bool(true)]; + tensor var_6105 = reduce_sum(axes = var_6105_axes_0, keep_dims = var_6105_keep_dims_0, x = var_6095)[name = string("op_6105")]; + tensor var_6111_cast_fp16 = real_div(x = var_6095, y = var_6105)[name = string("op_6111_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor V_expanded_17_cast_fp16 = transpose(perm = V_expanded_17_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_68")]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_6111_cast_fp16, y = V_expanded_17_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_6122 = const()[name = string("op_6122"), val = tensor([0, 2, 1, 3])]; + tensor var_6129 = const()[name = string("op_6129"), val = tensor([1, 3, -1])]; + tensor var_6123_cast_fp16 = transpose(perm = var_6122, x = attn_output_49_cast_fp16)[name = string("transpose_67")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_6129, x = var_6123_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_6134 = const()[name = string("op_6134"), val = tensor([0, 2, 1])]; + string var_6150_pad_type_0 = const()[name = string("op_6150_pad_type_0"), val = string("valid")]; + int32 var_6150_groups_0 = const()[name = string("op_6150_groups_0"), val = int32(1)]; + tensor var_6150_strides_0 = const()[name = string("op_6150_strides_0"), val = tensor([1])]; + tensor var_6150_pad_0 = const()[name = string("op_6150_pad_0"), val = tensor([0, 0])]; + tensor var_6150_dilations_0 = const()[name = string("op_6150_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571453248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574074752))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6135_cast_fp16 = transpose(perm = var_6134, x = attn_output_51_cast_fp16)[name = string("transpose_66")]; + tensor var_6150_cast_fp16 = conv(dilations = var_6150_dilations_0, groups = var_6150_groups_0, pad = var_6150_pad_0, pad_type = var_6150_pad_type_0, strides = var_6150_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_6135_cast_fp16)[name = string("op_6150_cast_fp16")]; + tensor var_6154 = const()[name = string("op_6154"), val = tensor([0, 2, 1])]; + int32 var_6160 = const()[name = string("op_6160"), val = int32(-1)]; + fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_171_cast_fp16 = transpose(perm = var_6154, x = var_6150_cast_fp16)[name = string("transpose_65")]; + tensor var_6162_cast_fp16 = mul(x = x_171_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_6162_cast_fp16")]; + bool input_255_interleave_0 = const()[name = string("input_255_interleave_0"), val = bool(false)]; + tensor input_255_cast_fp16 = concat(axis = var_6160, interleave = input_255_interleave_0, values = (x_171_cast_fp16, var_6162_cast_fp16))[name = string("input_255_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_6157_to_fp16 = const()[name = string("op_6157_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_6157_to_fp16, x = input_255_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor var_6167_split_sizes_0 = const()[name = string("op_6167_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6167_axis_0 = const()[name = string("op_6167_axis_0"), val = int32(-1)]; + tensor var_6167_cast_fp16_0, tensor var_6167_cast_fp16_1 = split(axis = var_6167_axis_0, split_sizes = var_6167_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_6167_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574077376)))]; + tensor attn_output_53_cast_fp16 = mul(x = var_6167_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor x_173_cast_fp16 = add(x = x_159_cast_fp16, y = attn_output_53_cast_fp16)[name = string("x_173_cast_fp16")]; + int32 var_6176 = const()[name = string("op_6176"), val = int32(-1)]; + fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6178_cast_fp16 = mul(x = x_173_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_6178_cast_fp16")]; + bool input_257_interleave_0 = const()[name = string("input_257_interleave_0"), val = bool(false)]; + tensor input_257_cast_fp16 = concat(axis = var_6176, interleave = input_257_interleave_0, values = (x_173_cast_fp16, var_6178_cast_fp16))[name = string("input_257_cast_fp16")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_6173_to_fp16 = const()[name = string("op_6173_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_6173_to_fp16, x = input_257_cast_fp16)[name = string("normed_245_cast_fp16")]; + tensor var_6183_split_sizes_0 = const()[name = string("op_6183_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6183_axis_0 = const()[name = string("op_6183_axis_0"), val = int32(-1)]; + tensor var_6183_cast_fp16_0, tensor var_6183_cast_fp16_1 = split(axis = var_6183_axis_0, split_sizes = var_6183_split_sizes_0, x = normed_245_cast_fp16)[name = string("op_6183_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574082560)))]; + tensor h_51_cast_fp16 = mul(x = var_6183_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_6194 = const()[name = string("op_6194"), val = tensor([0, 2, 1])]; + tensor input_259_axes_0 = const()[name = string("input_259_axes_0"), val = tensor([2])]; + tensor var_6195 = transpose(perm = var_6194, x = h_51_cast_fp16)[name = string("transpose_64")]; + tensor input_259 = expand_dims(axes = input_259_axes_0, x = var_6195)[name = string("input_259")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_259)[name = string("gate_33")]; + string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; + tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; + tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; + int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; + tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_259)[name = string("up_17")]; + string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; + tensor input_261 = mul(x = gate_35, y = up_17)[name = string("input_261")]; + string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")]; + tensor mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)]; + tensor mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_261)[name = string("mlp_out_17")]; + tensor var_6235_axes_0 = const()[name = string("op_6235_axes_0"), val = tensor([2])]; + tensor var_6235 = squeeze(axes = var_6235_axes_0, x = mlp_out_17)[name = string("op_6235")]; + tensor var_6239 = const()[name = string("op_6239"), val = tensor([0, 2, 1])]; + int32 var_6245 = const()[name = string("op_6245"), val = int32(-1)]; + fp16 const_104_promoted = const()[name = string("const_104_promoted"), val = fp16(-0x1p+0)]; + tensor x_175 = transpose(perm = var_6239, x = var_6235)[name = string("transpose_63")]; + tensor var_6247 = mul(x = x_175, y = const_104_promoted)[name = string("op_6247")]; + bool input_263_interleave_0 = const()[name = string("input_263_interleave_0"), val = bool(false)]; + tensor input_263 = concat(axis = var_6245, interleave = input_263_interleave_0, values = (x_175, var_6247))[name = string("input_263")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_6242_to_fp16 = const()[name = string("op_6242_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_6242_to_fp16, x = input_263)[name = string("normed_249_cast_fp16")]; + tensor var_6252_split_sizes_0 = const()[name = string("op_6252_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6252_axis_0 = const()[name = string("op_6252_axis_0"), val = int32(-1)]; + tensor var_6252_0, tensor var_6252_1 = split(axis = var_6252_axis_0, split_sizes = var_6252_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_6252")]; + tensor hidden_states_83 = mul(x = var_6252_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_173_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor([0, 0, 2048])]; + tensor per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor([1, 3, 2304])]; + tensor per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_17_cast_fp16 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_17_cast_fp16")]; + tensor var_6280 = const()[name = string("op_6280"), val = tensor([0, 2, 1])]; + tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; + tensor var_6281 = transpose(perm = var_6280, x = hidden_states_85_cast_fp16)[name = string("transpose_62")]; + tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_6281)[name = string("input_265")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_265)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_6300 = const()[name = string("op_6300"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_17_axes_0 = const()[name = string("per_layer_slice_conv_17_axes_0"), val = tensor([2])]; + tensor var_6301_cast_fp16 = transpose(perm = var_6300, x = per_layer_slice_17_cast_fp16)[name = string("transpose_61")]; + tensor per_layer_slice_conv_17_cast_fp16 = expand_dims(axes = per_layer_slice_conv_17_axes_0, x = var_6301_cast_fp16)[name = string("per_layer_slice_conv_17_cast_fp16")]; + tensor input_267_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_17_cast_fp16)[name = string("input_267_cast_fp16")]; + string gated_53_pad_type_0 = const()[name = string("gated_53_pad_type_0"), val = string("valid")]; + tensor gated_53_strides_0 = const()[name = string("gated_53_strides_0"), val = tensor([1, 1])]; + tensor gated_53_pad_0 = const()[name = string("gated_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_53_dilations_0 = const()[name = string("gated_53_dilations_0"), val = tensor([1, 1])]; + int32 gated_53_groups_0 = const()[name = string("gated_53_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574087744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574415488))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_53_cast_fp16 = conv(dilations = gated_53_dilations_0, groups = gated_53_groups_0, pad = gated_53_pad_0, pad_type = gated_53_pad_type_0, strides = gated_53_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_267_cast_fp16)[name = string("gated_53_cast_fp16")]; + tensor var_6317_axes_0 = const()[name = string("op_6317_axes_0"), val = tensor([2])]; + tensor var_6317_cast_fp16 = squeeze(axes = var_6317_axes_0, x = gated_53_cast_fp16)[name = string("op_6317_cast_fp16")]; + tensor var_6321 = const()[name = string("op_6321"), val = tensor([0, 2, 1])]; + int32 var_6327 = const()[name = string("op_6327"), val = int32(-1)]; + fp16 const_105_promoted_to_fp16 = const()[name = string("const_105_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_177_cast_fp16 = transpose(perm = var_6321, x = var_6317_cast_fp16)[name = string("transpose_60")]; + tensor var_6329_cast_fp16 = mul(x = x_177_cast_fp16, y = const_105_promoted_to_fp16)[name = string("op_6329_cast_fp16")]; + bool input_269_interleave_0 = const()[name = string("input_269_interleave_0"), val = bool(false)]; + tensor input_269_cast_fp16 = concat(axis = var_6327, interleave = input_269_interleave_0, values = (x_177_cast_fp16, var_6329_cast_fp16))[name = string("input_269_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_6324_to_fp16 = const()[name = string("op_6324_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_6324_to_fp16, x = input_269_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor var_6334_split_sizes_0 = const()[name = string("op_6334_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6334_axis_0 = const()[name = string("op_6334_axis_0"), val = int32(-1)]; + tensor var_6334_cast_fp16_0, tensor var_6334_cast_fp16_1 = split(axis = var_6334_axis_0, split_sizes = var_6334_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_6334_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574418112)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_6334_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_91_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; + tensor const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = tensor([0x1.d4p-3])]; + tensor x_179_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_106_promoted_to_fp16)[name = string("x_179_cast_fp16")]; + int32 var_6349 = const()[name = string("op_6349"), val = int32(-1)]; + fp16 const_107_promoted_to_fp16 = const()[name = string("const_107_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6351_cast_fp16 = mul(x = x_179_cast_fp16, y = const_107_promoted_to_fp16)[name = string("op_6351_cast_fp16")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271_cast_fp16 = concat(axis = var_6349, interleave = input_271_interleave_0, values = (x_179_cast_fp16, var_6351_cast_fp16))[name = string("input_271_cast_fp16")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_6346_to_fp16 = const()[name = string("op_6346_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_6346_to_fp16, x = input_271_cast_fp16)[name = string("normed_257_cast_fp16")]; + tensor var_6356_split_sizes_0 = const()[name = string("op_6356_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6356_axis_0 = const()[name = string("op_6356_axis_0"), val = int32(-1)]; + tensor var_6356_cast_fp16_0, tensor var_6356_cast_fp16_1 = split(axis = var_6356_axis_0, split_sizes = var_6356_split_sizes_0, x = normed_257_cast_fp16)[name = string("op_6356_cast_fp16")]; + tensor layers_9_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574423296)))]; + tensor h_55_cast_fp16 = mul(x = var_6356_cast_fp16_0, y = layers_9_input_layernorm_weight_promoted_to_fp16)[name = string("h_55_cast_fp16")]; + tensor var_6362 = const()[name = string("op_6362"), val = tensor([0, 2, 1])]; + tensor var_6365_axes_0 = const()[name = string("op_6365_axes_0"), val = tensor([2])]; + tensor var_6363_cast_fp16 = transpose(perm = var_6362, x = h_55_cast_fp16)[name = string("transpose_59")]; + tensor var_6365_cast_fp16 = expand_dims(axes = var_6365_axes_0, x = var_6363_cast_fp16)[name = string("op_6365_cast_fp16")]; + string q_109_pad_type_0 = const()[name = string("q_109_pad_type_0"), val = string("valid")]; + tensor q_109_strides_0 = const()[name = string("q_109_strides_0"), val = tensor([1, 1])]; + tensor q_109_pad_0 = const()[name = string("q_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_109_dilations_0 = const()[name = string("q_109_dilations_0"), val = tensor([1, 1])]; + int32 q_109_groups_0 = const()[name = string("q_109_groups_0"), val = int32(1)]; + tensor q_109 = conv(dilations = q_109_dilations_0, groups = q_109_groups_0, pad = q_109_pad_0, pad_type = q_109_pad_type_0, strides = q_109_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_6365_cast_fp16)[name = string("q_109")]; + tensor var_6386 = const()[name = string("op_6386"), val = tensor([1, 8, 256, 3])]; + tensor var_6387 = reshape(shape = var_6386, x = q_109)[name = string("op_6387")]; + tensor transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_6410 = const()[name = string("op_6410"), val = tensor([3, 8, 256])]; + tensor transpose_75 = transpose(perm = transpose_75_perm_0, x = var_6387)[name = string("transpose_58")]; + tensor x_181 = reshape(shape = var_6410, x = transpose_75)[name = string("x_181")]; + int32 var_6416 = const()[name = string("op_6416"), val = int32(-1)]; + fp16 const_108_promoted = const()[name = string("const_108_promoted"), val = fp16(-0x1p+0)]; + tensor var_6418 = mul(x = x_181, y = const_108_promoted)[name = string("op_6418")]; + bool input_275_interleave_0 = const()[name = string("input_275_interleave_0"), val = bool(false)]; + tensor input_275 = concat(axis = var_6416, interleave = input_275_interleave_0, values = (x_181, var_6418))[name = string("input_275")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_6413_to_fp16 = const()[name = string("op_6413_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_6413_to_fp16, x = input_275)[name = string("normed_261_cast_fp16")]; + tensor var_6423_split_sizes_0 = const()[name = string("op_6423_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6423_axis_0 = const()[name = string("op_6423_axis_0"), val = int32(-1)]; + tensor var_6423_0, tensor var_6423_1 = split(axis = var_6423_axis_0, split_sizes = var_6423_split_sizes_0, x = normed_261_cast_fp16)[name = string("op_6423")]; + tensor q_113 = mul(x = var_6423_0, y = layers_9_self_attn_q_norm_weight)[name = string("q_113")]; + tensor var_6430 = const()[name = string("op_6430"), val = tensor([1, 3, 8, 256])]; + tensor var_6431 = reshape(shape = var_6430, x = q_113)[name = string("op_6431")]; + tensor var_6436 = const()[name = string("op_6436"), val = tensor([0, 2, 1, 3])]; + tensor q_115 = transpose(perm = var_6436, x = var_6431)[name = string("transpose_57")]; + tensor var_6438_cast_fp16 = mul(x = q_115, y = cos_s)[name = string("op_6438_cast_fp16")]; + tensor var_6439_split_sizes_0 = const()[name = string("op_6439_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6439_axis_0 = const()[name = string("op_6439_axis_0"), val = int32(-1)]; + tensor var_6439_0, tensor var_6439_1 = split(axis = var_6439_axis_0, split_sizes = var_6439_split_sizes_0, x = q_115)[name = string("op_6439")]; + fp16 const_109_promoted = const()[name = string("const_109_promoted"), val = fp16(-0x1p+0)]; + tensor var_6441 = mul(x = var_6439_1, y = const_109_promoted)[name = string("op_6441")]; + int32 var_6443 = const()[name = string("op_6443"), val = int32(-1)]; + bool var_6444_interleave_0 = const()[name = string("op_6444_interleave_0"), val = bool(false)]; + tensor var_6444 = concat(axis = var_6443, interleave = var_6444_interleave_0, values = (var_6441, var_6439_0))[name = string("op_6444")]; + tensor var_6445_cast_fp16 = mul(x = var_6444, y = sin_s)[name = string("op_6445_cast_fp16")]; + tensor q_119_cast_fp16 = add(x = var_6438_cast_fp16, y = var_6445_cast_fp16)[name = string("q_119_cast_fp16")]; + string k_57_pad_type_0 = const()[name = string("k_57_pad_type_0"), val = string("valid")]; + tensor k_57_strides_0 = const()[name = string("k_57_strides_0"), val = tensor([1, 1])]; + tensor k_57_pad_0 = const()[name = string("k_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_57_dilations_0 = const()[name = string("k_57_dilations_0"), val = tensor([1, 1])]; + int32 k_57_groups_0 = const()[name = string("k_57_groups_0"), val = int32(1)]; + tensor k_57 = conv(dilations = k_57_dilations_0, groups = k_57_groups_0, pad = k_57_pad_0, pad_type = k_57_pad_type_0, strides = k_57_strides_0, weight = layers_9_self_attn_k_proj_weight_palettized, x = var_6365_cast_fp16)[name = string("k_57")]; + tensor var_6463 = const()[name = string("op_6463"), val = tensor([1, 2, 256, 3])]; + tensor var_6464 = reshape(shape = var_6463, x = k_57)[name = string("op_6464")]; + tensor transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_21_pad_type_0 = const()[name = string("v_21_pad_type_0"), val = string("valid")]; + tensor v_21_strides_0 = const()[name = string("v_21_strides_0"), val = tensor([1, 1])]; + tensor v_21_pad_0 = const()[name = string("v_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_21_dilations_0 = const()[name = string("v_21_dilations_0"), val = tensor([1, 1])]; + int32 v_21_groups_0 = const()[name = string("v_21_groups_0"), val = int32(1)]; + tensor v_21 = conv(dilations = v_21_dilations_0, groups = v_21_groups_0, pad = v_21_pad_0, pad_type = v_21_pad_type_0, strides = v_21_strides_0, weight = layers_9_self_attn_v_proj_weight_palettized, x = var_6365_cast_fp16)[name = string("v_21")]; + tensor var_6491 = const()[name = string("op_6491"), val = tensor([1, 2, 256, 3])]; + tensor var_6492 = reshape(shape = var_6491, x = v_21)[name = string("op_6492")]; + tensor var_6497 = const()[name = string("op_6497"), val = tensor([0, 1, 3, 2])]; + tensor var_6515 = const()[name = string("op_6515"), val = tensor([3, 2, 256])]; + tensor transpose_76 = transpose(perm = transpose_76_perm_0, x = var_6464)[name = string("transpose_56")]; + tensor x_183 = reshape(shape = var_6515, x = transpose_76)[name = string("x_183")]; + int32 var_6521 = const()[name = string("op_6521"), val = int32(-1)]; + fp16 const_110_promoted = const()[name = string("const_110_promoted"), val = fp16(-0x1p+0)]; + tensor var_6523 = mul(x = x_183, y = const_110_promoted)[name = string("op_6523")]; + bool input_277_interleave_0 = const()[name = string("input_277_interleave_0"), val = bool(false)]; + tensor input_277 = concat(axis = var_6521, interleave = input_277_interleave_0, values = (x_183, var_6523))[name = string("input_277")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_6518_to_fp16 = const()[name = string("op_6518_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_6518_to_fp16, x = input_277)[name = string("normed_265_cast_fp16")]; + tensor var_6528_split_sizes_0 = const()[name = string("op_6528_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6528_axis_0 = const()[name = string("op_6528_axis_0"), val = int32(-1)]; + tensor var_6528_0, tensor var_6528_1 = split(axis = var_6528_axis_0, split_sizes = var_6528_split_sizes_0, x = normed_265_cast_fp16)[name = string("op_6528")]; + tensor k_61 = mul(x = var_6528_0, y = layers_9_self_attn_k_norm_weight)[name = string("k_61")]; + tensor var_6535 = const()[name = string("op_6535"), val = tensor([1, 3, 2, 256])]; + tensor var_6536 = reshape(shape = var_6535, x = k_61)[name = string("op_6536")]; + tensor var_6541 = const()[name = string("op_6541"), val = tensor([0, 2, 1, 3])]; + fp16 var_6543_promoted = const()[name = string("op_6543_promoted"), val = fp16(0x1p+1)]; + tensor var_6498 = transpose(perm = var_6497, x = var_6492)[name = string("transpose_55")]; + tensor var_6544 = pow(x = var_6498, y = var_6543_promoted)[name = string("op_6544")]; + tensor var_6549_axes_0 = const()[name = string("op_6549_axes_0"), val = tensor([-1])]; + bool var_6549_keep_dims_0 = const()[name = string("op_6549_keep_dims_0"), val = bool(true)]; + tensor var_6549 = reduce_mean(axes = var_6549_axes_0, keep_dims = var_6549_keep_dims_0, x = var_6544)[name = string("op_6549")]; + fp16 var_6551_to_fp16 = const()[name = string("op_6551_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_19_cast_fp16 = add(x = var_6549, y = var_6551_to_fp16)[name = string("mean_sq_19_cast_fp16")]; + fp32 var_6553_epsilon_0 = const()[name = string("op_6553_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6553_cast_fp16 = rsqrt(epsilon = var_6553_epsilon_0, x = mean_sq_19_cast_fp16)[name = string("op_6553_cast_fp16")]; + tensor input_281_cast_fp16 = mul(x = var_6498, y = var_6553_cast_fp16)[name = string("input_281_cast_fp16")]; + tensor q_117 = transpose(perm = var_6541, x = var_6536)[name = string("transpose_54")]; + tensor var_6555_cast_fp16 = mul(x = q_117, y = cos_s)[name = string("op_6555_cast_fp16")]; + tensor var_6556_split_sizes_0 = const()[name = string("op_6556_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6556_axis_0 = const()[name = string("op_6556_axis_0"), val = int32(-1)]; + tensor var_6556_0, tensor var_6556_1 = split(axis = var_6556_axis_0, split_sizes = var_6556_split_sizes_0, x = q_117)[name = string("op_6556")]; + fp16 const_111_promoted = const()[name = string("const_111_promoted"), val = fp16(-0x1p+0)]; + tensor var_6558 = mul(x = var_6556_1, y = const_111_promoted)[name = string("op_6558")]; + int32 var_6560 = const()[name = string("op_6560"), val = int32(-1)]; + bool var_6561_interleave_0 = const()[name = string("op_6561_interleave_0"), val = bool(false)]; + tensor var_6561 = concat(axis = var_6560, interleave = var_6561_interleave_0, values = (var_6558, var_6556_0))[name = string("op_6561")]; + tensor var_6562_cast_fp16 = mul(x = var_6561, y = sin_s)[name = string("op_6562_cast_fp16")]; + tensor input_279_cast_fp16 = add(x = var_6555_cast_fp16, y = var_6562_cast_fp16)[name = string("input_279_cast_fp16")]; + tensor k_padded_17_pad_0 = const()[name = string("k_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_17_mode_0 = const()[name = string("k_padded_17_mode_0"), val = string("constant")]; + fp16 const_112_to_fp16 = const()[name = string("const_112_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_17_cast_fp16 = pad(constant_val = const_112_to_fp16, mode = k_padded_17_mode_0, pad = k_padded_17_pad_0, x = input_279_cast_fp16)[name = string("k_padded_17_cast_fp16")]; + tensor v_padded_17_pad_0 = const()[name = string("v_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_17_mode_0 = const()[name = string("v_padded_17_mode_0"), val = string("constant")]; + fp16 const_113_to_fp16 = const()[name = string("const_113_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_17_cast_fp16 = pad(constant_val = const_113_to_fp16, mode = v_padded_17_mode_0, pad = v_padded_17_pad_0, x = input_281_cast_fp16)[name = string("v_padded_17_cast_fp16")]; + tensor slot_k_19_begin_0 = const()[name = string("slot_k_19_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor slot_k_19_end_0 = const()[name = string("slot_k_19_end_0"), val = tensor([9, 2, 512, 512])]; + tensor slot_k_19_end_mask_0 = const()[name = string("slot_k_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_19_cast_fp16 = slice_by_index(begin = slot_k_19_begin_0, end = slot_k_19_end_0, end_mask = slot_k_19_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("slot_k_19_cast_fp16")]; + tensor slot_v_19_begin_0 = const()[name = string("slot_v_19_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor slot_v_19_end_0 = const()[name = string("slot_v_19_end_0"), val = tensor([9, 2, 512, 512])]; + tensor slot_v_19_end_mask_0 = const()[name = string("slot_v_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_19_cast_fp16 = slice_by_index(begin = slot_v_19_begin_0, end = slot_v_19_end_0, end_mask = slot_v_19_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("slot_v_19_cast_fp16")]; + tensor var_6601_begin_0 = const()[name = string("op_6601_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_6601_end_0 = const()[name = string("op_6601_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6601_end_mask_0 = const()[name = string("op_6601_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6601_cast_fp16 = slice_by_index(begin = var_6601_begin_0, end = var_6601_end_0, end_mask = var_6601_end_mask_0, x = slot_k_19_cast_fp16)[name = string("op_6601_cast_fp16")]; + int32 var_6608 = const()[name = string("op_6608"), val = int32(2)]; + bool new_k_19_interleave_0 = const()[name = string("new_k_19_interleave_0"), val = bool(false)]; + tensor new_k_19_cast_fp16 = concat(axis = var_6608, interleave = new_k_19_interleave_0, values = (var_6601_cast_fp16, k_padded_17_cast_fp16))[name = string("new_k_19_cast_fp16")]; + tensor var_6624_begin_0 = const()[name = string("op_6624_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_6624_end_0 = const()[name = string("op_6624_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6624_end_mask_0 = const()[name = string("op_6624_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6624_cast_fp16 = slice_by_index(begin = var_6624_begin_0, end = var_6624_end_0, end_mask = var_6624_end_mask_0, x = slot_v_19_cast_fp16)[name = string("op_6624_cast_fp16")]; + int32 var_6631 = const()[name = string("op_6631"), val = int32(2)]; + bool new_v_19_interleave_0 = const()[name = string("new_v_19_interleave_0"), val = bool(false)]; + tensor new_v_19_cast_fp16 = concat(axis = var_6631, interleave = new_v_19_interleave_0, values = (var_6624_cast_fp16, v_padded_17_cast_fp16))[name = string("new_v_19_cast_fp16")]; + tensor var_6637_begin_0 = const()[name = string("op_6637_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6637_end_0 = const()[name = string("op_6637_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_6637_end_mask_0 = const()[name = string("op_6637_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6637_cast_fp16 = slice_by_index(begin = var_6637_begin_0, end = var_6637_end_0, end_mask = var_6637_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("op_6637_cast_fp16")]; + tensor var_6642_begin_0 = const()[name = string("op_6642_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6642_end_0 = const()[name = string("op_6642_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6642_end_mask_0 = const()[name = string("op_6642_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6642_cast_fp16 = slice_by_index(begin = var_6642_begin_0, end = var_6642_end_0, end_mask = var_6642_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("op_6642_cast_fp16")]; + int32 var_6644 = const()[name = string("op_6644"), val = int32(0)]; + bool K_sliding_out_17_interleave_0 = const()[name = string("K_sliding_out_17_interleave_0"), val = bool(false)]; + tensor K_sliding_out_17_cast_fp16 = concat(axis = var_6644, interleave = K_sliding_out_17_interleave_0, values = (var_6637_cast_fp16, new_k_19_cast_fp16, var_6642_cast_fp16))[name = string("K_sliding_out_17_cast_fp16")]; + tensor var_6650_begin_0 = const()[name = string("op_6650_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6650_end_0 = const()[name = string("op_6650_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_6650_end_mask_0 = const()[name = string("op_6650_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6650_cast_fp16 = slice_by_index(begin = var_6650_begin_0, end = var_6650_end_0, end_mask = var_6650_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("op_6650_cast_fp16")]; + tensor var_6655_begin_0 = const()[name = string("op_6655_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6655_end_0 = const()[name = string("op_6655_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6655_end_mask_0 = const()[name = string("op_6655_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6655_cast_fp16 = slice_by_index(begin = var_6655_begin_0, end = var_6655_end_0, end_mask = var_6655_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("op_6655_cast_fp16")]; + int32 var_6657 = const()[name = string("op_6657"), val = int32(0)]; + bool V_sliding_out_17_interleave_0 = const()[name = string("V_sliding_out_17_interleave_0"), val = bool(false)]; + tensor V_sliding_out_17_cast_fp16 = concat(axis = var_6657, interleave = V_sliding_out_17_interleave_0, values = (var_6650_cast_fp16, new_v_19_cast_fp16, var_6655_cast_fp16))[name = string("V_sliding_out_17_cast_fp16")]; + tensor var_6663_begin_0 = const()[name = string("op_6663_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6663_end_0 = const()[name = string("op_6663_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_6663_end_mask_0 = const()[name = string("op_6663_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6663_cast_fp16 = slice_by_index(begin = var_6663_begin_0, end = var_6663_end_0, end_mask = var_6663_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("op_6663_cast_fp16")]; + tensor K_for_attn_19_begin_0 = const()[name = string("K_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_19_end_0 = const()[name = string("K_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_19_end_mask_0 = const()[name = string("K_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_19_cast_fp16 = slice_by_index(begin = K_for_attn_19_begin_0, end = K_for_attn_19_end_0, end_mask = K_for_attn_19_end_mask_0, x = var_6663_cast_fp16)[name = string("K_for_attn_19_cast_fp16")]; + tensor var_6673_begin_0 = const()[name = string("op_6673_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6673_end_0 = const()[name = string("op_6673_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_6673_end_mask_0 = const()[name = string("op_6673_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6673_cast_fp16 = slice_by_index(begin = var_6673_begin_0, end = var_6673_end_0, end_mask = var_6673_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("op_6673_cast_fp16")]; + tensor V_for_attn_19_begin_0 = const()[name = string("V_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_19_end_0 = const()[name = string("V_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_19_end_mask_0 = const()[name = string("V_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_19_cast_fp16 = slice_by_index(begin = V_for_attn_19_begin_0, end = V_for_attn_19_end_0, end_mask = V_for_attn_19_end_mask_0, x = var_6673_cast_fp16)[name = string("V_for_attn_19_cast_fp16")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_18_reps_0 = const()[name = string("tile_18_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = K_for_attn_19_cast_fp16)[name = string("transpose_53")]; + tensor tile_18_cast_fp16 = tile(reps = tile_18_reps_0, x = transpose_36_cast_fp16)[name = string("tile_18_cast_fp16")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_36_cast_fp16 = reshape(shape = concat_38, x = tile_18_cast_fp16)[name = string("reshape_36_cast_fp16")]; + tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_39 = const()[name = string("concat_39"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = reshape_36_cast_fp16)[name = string("transpose_52")]; + tensor reshape_37_cast_fp16 = reshape(shape = concat_39, x = transpose_37_cast_fp16)[name = string("reshape_37_cast_fp16")]; + tensor transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_19_reps_0 = const()[name = string("tile_19_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_38_cast_fp16 = transpose(perm = transpose_38_perm_0, x = V_for_attn_19_cast_fp16)[name = string("transpose_51")]; + tensor tile_19_cast_fp16 = tile(reps = tile_19_reps_0, x = transpose_38_cast_fp16)[name = string("tile_19_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_38_cast_fp16 = reshape(shape = concat_40, x = tile_19_cast_fp16)[name = string("reshape_38_cast_fp16")]; + tensor transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_39_cast_fp16 = transpose(perm = transpose_39_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_50")]; + tensor reshape_39_cast_fp16 = reshape(shape = concat_41, x = transpose_39_cast_fp16)[name = string("reshape_39_cast_fp16")]; + tensor V_expanded_19_perm_0 = const()[name = string("V_expanded_19_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_37_transpose_x_0 = const()[name = string("attn_weights_37_transpose_x_0"), val = bool(false)]; + bool attn_weights_37_transpose_y_0 = const()[name = string("attn_weights_37_transpose_y_0"), val = bool(false)]; + tensor transpose_77_cast_fp16 = transpose(perm = transpose_77_perm_0, x = reshape_37_cast_fp16)[name = string("transpose_49")]; + tensor attn_weights_37_cast_fp16 = matmul(transpose_x = attn_weights_37_transpose_x_0, transpose_y = attn_weights_37_transpose_y_0, x = q_119_cast_fp16, y = transpose_77_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor x_187_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask_sliding)[name = string("x_187_cast_fp16")]; + tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; + bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; + tensor reduce_max_9 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_187_cast_fp16)[name = string("reduce_max_9")]; + tensor var_6708 = sub(x = x_187_cast_fp16, y = reduce_max_9)[name = string("op_6708")]; + tensor var_6714 = exp(x = var_6708)[name = string("op_6714")]; + tensor var_6724_axes_0 = const()[name = string("op_6724_axes_0"), val = tensor([-1])]; + bool var_6724_keep_dims_0 = const()[name = string("op_6724_keep_dims_0"), val = bool(true)]; + tensor var_6724 = reduce_sum(axes = var_6724_axes_0, keep_dims = var_6724_keep_dims_0, x = var_6714)[name = string("op_6724")]; + tensor var_6730_cast_fp16 = real_div(x = var_6714, y = var_6724)[name = string("op_6730_cast_fp16")]; + bool attn_output_55_transpose_x_0 = const()[name = string("attn_output_55_transpose_x_0"), val = bool(false)]; + bool attn_output_55_transpose_y_0 = const()[name = string("attn_output_55_transpose_y_0"), val = bool(false)]; + tensor V_expanded_19_cast_fp16 = transpose(perm = V_expanded_19_perm_0, x = reshape_39_cast_fp16)[name = string("transpose_48")]; + tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_0, transpose_y = attn_output_55_transpose_y_0, x = var_6730_cast_fp16, y = V_expanded_19_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_6741 = const()[name = string("op_6741"), val = tensor([0, 2, 1, 3])]; + tensor var_6748 = const()[name = string("op_6748"), val = tensor([1, 3, -1])]; + tensor var_6742_cast_fp16 = transpose(perm = var_6741, x = attn_output_55_cast_fp16)[name = string("transpose_47")]; + tensor attn_output_57_cast_fp16 = reshape(shape = var_6748, x = var_6742_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_6753 = const()[name = string("op_6753"), val = tensor([0, 2, 1])]; + string var_6769_pad_type_0 = const()[name = string("op_6769_pad_type_0"), val = string("valid")]; + int32 var_6769_groups_0 = const()[name = string("op_6769_groups_0"), val = int32(1)]; + tensor var_6769_strides_0 = const()[name = string("op_6769_strides_0"), val = tensor([1])]; + tensor var_6769_pad_0 = const()[name = string("op_6769_pad_0"), val = tensor([0, 0])]; + tensor var_6769_dilations_0 = const()[name = string("op_6769_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(574428480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577049984))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6754_cast_fp16 = transpose(perm = var_6753, x = attn_output_57_cast_fp16)[name = string("transpose_46")]; + tensor var_6769_cast_fp16 = conv(dilations = var_6769_dilations_0, groups = var_6769_groups_0, pad = var_6769_pad_0, pad_type = var_6769_pad_type_0, strides = var_6769_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6754_cast_fp16)[name = string("op_6769_cast_fp16")]; + tensor var_6773 = const()[name = string("op_6773"), val = tensor([0, 2, 1])]; + int32 var_6779 = const()[name = string("op_6779"), val = int32(-1)]; + fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_191_cast_fp16 = transpose(perm = var_6773, x = var_6769_cast_fp16)[name = string("transpose_45")]; + tensor var_6781_cast_fp16 = mul(x = x_191_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_6781_cast_fp16")]; + bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; + tensor input_285_cast_fp16 = concat(axis = var_6779, interleave = input_285_interleave_0, values = (x_191_cast_fp16, var_6781_cast_fp16))[name = string("input_285_cast_fp16")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_6776_to_fp16 = const()[name = string("op_6776_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_6776_to_fp16, x = input_285_cast_fp16)[name = string("normed_269_cast_fp16")]; + tensor var_6786_split_sizes_0 = const()[name = string("op_6786_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6786_axis_0 = const()[name = string("op_6786_axis_0"), val = int32(-1)]; + tensor var_6786_cast_fp16_0, tensor var_6786_cast_fp16_1 = split(axis = var_6786_axis_0, split_sizes = var_6786_split_sizes_0, x = normed_269_cast_fp16)[name = string("op_6786_cast_fp16")]; + tensor layers_9_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577052608)))]; + tensor attn_output_59_cast_fp16 = mul(x = var_6786_cast_fp16_0, y = layers_9_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor x_193_cast_fp16 = add(x = x_179_cast_fp16, y = attn_output_59_cast_fp16)[name = string("x_193_cast_fp16")]; + int32 var_6795 = const()[name = string("op_6795"), val = int32(-1)]; + fp16 const_115_promoted_to_fp16 = const()[name = string("const_115_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6797_cast_fp16 = mul(x = x_193_cast_fp16, y = const_115_promoted_to_fp16)[name = string("op_6797_cast_fp16")]; + bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; + tensor input_287_cast_fp16 = concat(axis = var_6795, interleave = input_287_interleave_0, values = (x_193_cast_fp16, var_6797_cast_fp16))[name = string("input_287_cast_fp16")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_6792_to_fp16 = const()[name = string("op_6792_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_6792_to_fp16, x = input_287_cast_fp16)[name = string("normed_273_cast_fp16")]; + tensor var_6802_split_sizes_0 = const()[name = string("op_6802_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6802_axis_0 = const()[name = string("op_6802_axis_0"), val = int32(-1)]; + tensor var_6802_cast_fp16_0, tensor var_6802_cast_fp16_1 = split(axis = var_6802_axis_0, split_sizes = var_6802_split_sizes_0, x = normed_273_cast_fp16)[name = string("op_6802_cast_fp16")]; + tensor layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577057792)))]; + tensor h_57_cast_fp16 = mul(x = var_6802_cast_fp16_0, y = layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_57_cast_fp16")]; + tensor var_6813 = const()[name = string("op_6813"), val = tensor([0, 2, 1])]; + tensor input_289_axes_0 = const()[name = string("input_289_axes_0"), val = tensor([2])]; + tensor var_6814 = transpose(perm = var_6813, x = h_57_cast_fp16)[name = string("transpose_44")]; + tensor input_289 = expand_dims(axes = input_289_axes_0, x = var_6814)[name = string("input_289")]; + string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; + tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; + tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; + int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; + tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_289)[name = string("gate_37")]; + string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; + tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; + tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; + int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; + tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_289)[name = string("up_19")]; + string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; + tensor input_291 = mul(x = gate_39, y = up_19)[name = string("input_291")]; + string mlp_out_19_pad_type_0 = const()[name = string("mlp_out_19_pad_type_0"), val = string("valid")]; + tensor mlp_out_19_strides_0 = const()[name = string("mlp_out_19_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_19_pad_0 = const()[name = string("mlp_out_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_19_dilations_0 = const()[name = string("mlp_out_19_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_19_groups_0 = const()[name = string("mlp_out_19_groups_0"), val = int32(1)]; + tensor mlp_out_19 = conv(dilations = mlp_out_19_dilations_0, groups = mlp_out_19_groups_0, pad = mlp_out_19_pad_0, pad_type = mlp_out_19_pad_type_0, strides = mlp_out_19_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_291)[name = string("mlp_out_19")]; + tensor var_6854_axes_0 = const()[name = string("op_6854_axes_0"), val = tensor([2])]; + tensor var_6854 = squeeze(axes = var_6854_axes_0, x = mlp_out_19)[name = string("op_6854")]; + tensor var_6858 = const()[name = string("op_6858"), val = tensor([0, 2, 1])]; + int32 var_6864 = const()[name = string("op_6864"), val = int32(-1)]; + fp16 const_116_promoted = const()[name = string("const_116_promoted"), val = fp16(-0x1p+0)]; + tensor x_195 = transpose(perm = var_6858, x = var_6854)[name = string("transpose_43")]; + tensor var_6866 = mul(x = x_195, y = const_116_promoted)[name = string("op_6866")]; + bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; + tensor input_293 = concat(axis = var_6864, interleave = input_293_interleave_0, values = (x_195, var_6866))[name = string("input_293")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_6861_to_fp16 = const()[name = string("op_6861_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_6861_to_fp16, x = input_293)[name = string("normed_277_cast_fp16")]; + tensor var_6871_split_sizes_0 = const()[name = string("op_6871_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6871_axis_0 = const()[name = string("op_6871_axis_0"), val = int32(-1)]; + tensor var_6871_0, tensor var_6871_1 = split(axis = var_6871_axis_0, split_sizes = var_6871_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_6871")]; + tensor hidden_states_93 = mul(x = var_6871_0, y = layers_9_post_feedforward_layernorm_weight)[name = string("hidden_states_93")]; + tensor hidden_states_95_cast_fp16 = add(x = x_193_cast_fp16, y = hidden_states_93)[name = string("hidden_states_95_cast_fp16")]; + tensor per_layer_slice_19_begin_0 = const()[name = string("per_layer_slice_19_begin_0"), val = tensor([0, 0, 2304])]; + tensor per_layer_slice_19_end_0 = const()[name = string("per_layer_slice_19_end_0"), val = tensor([1, 3, 2560])]; + tensor per_layer_slice_19_end_mask_0 = const()[name = string("per_layer_slice_19_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_19_cast_fp16 = slice_by_index(begin = per_layer_slice_19_begin_0, end = per_layer_slice_19_end_0, end_mask = per_layer_slice_19_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_19_cast_fp16")]; + tensor var_6899 = const()[name = string("op_6899"), val = tensor([0, 2, 1])]; + tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; + tensor var_6900 = transpose(perm = var_6899, x = hidden_states_95_cast_fp16)[name = string("transpose_42")]; + tensor input_295 = expand_dims(axes = input_295_axes_0, x = var_6900)[name = string("input_295")]; + string gated_55_pad_type_0 = const()[name = string("gated_55_pad_type_0"), val = string("valid")]; + tensor gated_55_strides_0 = const()[name = string("gated_55_strides_0"), val = tensor([1, 1])]; + tensor gated_55_pad_0 = const()[name = string("gated_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_55_dilations_0 = const()[name = string("gated_55_dilations_0"), val = tensor([1, 1])]; + int32 gated_55_groups_0 = const()[name = string("gated_55_groups_0"), val = int32(1)]; + tensor gated_55 = conv(dilations = gated_55_dilations_0, groups = gated_55_groups_0, pad = gated_55_pad_0, pad_type = gated_55_pad_type_0, strides = gated_55_strides_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = input_295)[name = string("gated_55")]; + string gated_57_mode_0 = const()[name = string("gated_57_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_57 = gelu(mode = gated_57_mode_0, x = gated_55)[name = string("gated_57")]; + tensor var_6919 = const()[name = string("op_6919"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_19_axes_0 = const()[name = string("per_layer_slice_conv_19_axes_0"), val = tensor([2])]; + tensor var_6920_cast_fp16 = transpose(perm = var_6919, x = per_layer_slice_19_cast_fp16)[name = string("transpose_41")]; + tensor per_layer_slice_conv_19_cast_fp16 = expand_dims(axes = per_layer_slice_conv_19_axes_0, x = var_6920_cast_fp16)[name = string("per_layer_slice_conv_19_cast_fp16")]; + tensor input_297_cast_fp16 = mul(x = gated_57, y = per_layer_slice_conv_19_cast_fp16)[name = string("input_297_cast_fp16")]; + string gated_59_pad_type_0 = const()[name = string("gated_59_pad_type_0"), val = string("valid")]; + tensor gated_59_strides_0 = const()[name = string("gated_59_strides_0"), val = tensor([1, 1])]; + tensor gated_59_pad_0 = const()[name = string("gated_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_59_dilations_0 = const()[name = string("gated_59_dilations_0"), val = tensor([1, 1])]; + int32 gated_59_groups_0 = const()[name = string("gated_59_groups_0"), val = int32(1)]; + tensor layers_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577062976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577390720))))[name = string("layers_9_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_59_cast_fp16 = conv(dilations = gated_59_dilations_0, groups = gated_59_groups_0, pad = gated_59_pad_0, pad_type = gated_59_pad_type_0, strides = gated_59_strides_0, weight = layers_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("gated_59_cast_fp16")]; + tensor var_6936_axes_0 = const()[name = string("op_6936_axes_0"), val = tensor([2])]; + tensor var_6936_cast_fp16 = squeeze(axes = var_6936_axes_0, x = gated_59_cast_fp16)[name = string("op_6936_cast_fp16")]; + tensor var_6940 = const()[name = string("op_6940"), val = tensor([0, 2, 1])]; + int32 var_6946 = const()[name = string("op_6946"), val = int32(-1)]; + fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_197_cast_fp16 = transpose(perm = var_6940, x = var_6936_cast_fp16)[name = string("transpose_40")]; + tensor var_6948_cast_fp16 = mul(x = x_197_cast_fp16, y = const_117_promoted_to_fp16)[name = string("op_6948_cast_fp16")]; + bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; + tensor input_299_cast_fp16 = concat(axis = var_6946, interleave = input_299_interleave_0, values = (x_197_cast_fp16, var_6948_cast_fp16))[name = string("input_299_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_6943_to_fp16 = const()[name = string("op_6943_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_6943_to_fp16, x = input_299_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor var_6953_split_sizes_0 = const()[name = string("op_6953_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6953_axis_0 = const()[name = string("op_6953_axis_0"), val = int32(-1)]; + tensor var_6953_cast_fp16_0, tensor var_6953_cast_fp16_1 = split(axis = var_6953_axis_0, split_sizes = var_6953_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_6953_cast_fp16")]; + tensor layers_9_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577393344)))]; + tensor hidden_states_99_cast_fp16 = mul(x = var_6953_cast_fp16_0, y = layers_9_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + tensor const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = tensor([0x1.a8p-2])]; + tensor x_199_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_118_promoted_to_fp16)[name = string("x_199_cast_fp16")]; + int32 var_6968 = const()[name = string("op_6968"), val = int32(-1)]; + fp16 const_119_promoted_to_fp16 = const()[name = string("const_119_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6970_cast_fp16 = mul(x = x_199_cast_fp16, y = const_119_promoted_to_fp16)[name = string("op_6970_cast_fp16")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301_cast_fp16 = concat(axis = var_6968, interleave = input_301_interleave_0, values = (x_199_cast_fp16, var_6970_cast_fp16))[name = string("input_301_cast_fp16")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_6965_to_fp16 = const()[name = string("op_6965_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_6965_to_fp16, x = input_301_cast_fp16)[name = string("normed_285_cast_fp16")]; + tensor var_6975_split_sizes_0 = const()[name = string("op_6975_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6975_axis_0 = const()[name = string("op_6975_axis_0"), val = int32(-1)]; + tensor var_6975_cast_fp16_0, tensor var_6975_cast_fp16_1 = split(axis = var_6975_axis_0, split_sizes = var_6975_split_sizes_0, x = normed_285_cast_fp16)[name = string("op_6975_cast_fp16")]; + tensor layers_10_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577398528)))]; + tensor h_61_cast_fp16 = mul(x = var_6975_cast_fp16_0, y = layers_10_input_layernorm_weight_promoted_to_fp16)[name = string("h_61_cast_fp16")]; + tensor var_6981 = const()[name = string("op_6981"), val = tensor([0, 2, 1])]; + tensor var_6984_axes_0 = const()[name = string("op_6984_axes_0"), val = tensor([2])]; + tensor var_6982_cast_fp16 = transpose(perm = var_6981, x = h_61_cast_fp16)[name = string("transpose_39")]; + tensor var_6984_cast_fp16 = expand_dims(axes = var_6984_axes_0, x = var_6982_cast_fp16)[name = string("op_6984_cast_fp16")]; + string q_121_pad_type_0 = const()[name = string("q_121_pad_type_0"), val = string("valid")]; + tensor q_121_strides_0 = const()[name = string("q_121_strides_0"), val = tensor([1, 1])]; + tensor q_121_pad_0 = const()[name = string("q_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_121_dilations_0 = const()[name = string("q_121_dilations_0"), val = tensor([1, 1])]; + int32 q_121_groups_0 = const()[name = string("q_121_groups_0"), val = int32(1)]; + tensor q_121 = conv(dilations = q_121_dilations_0, groups = q_121_groups_0, pad = q_121_pad_0, pad_type = q_121_pad_type_0, strides = q_121_strides_0, weight = layers_10_self_attn_q_proj_weight_palettized, x = var_6984_cast_fp16)[name = string("q_121")]; + tensor var_7005 = const()[name = string("op_7005"), val = tensor([1, 8, 256, 3])]; + tensor var_7006 = reshape(shape = var_7005, x = q_121)[name = string("op_7006")]; + tensor transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_7029 = const()[name = string("op_7029"), val = tensor([3, 8, 256])]; + tensor transpose_78 = transpose(perm = transpose_78_perm_0, x = var_7006)[name = string("transpose_38")]; + tensor x_201 = reshape(shape = var_7029, x = transpose_78)[name = string("x_201")]; + int32 var_7035 = const()[name = string("op_7035"), val = int32(-1)]; + fp16 const_120_promoted = const()[name = string("const_120_promoted"), val = fp16(-0x1p+0)]; + tensor var_7037 = mul(x = x_201, y = const_120_promoted)[name = string("op_7037")]; + bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; + tensor input_305 = concat(axis = var_7035, interleave = input_305_interleave_0, values = (x_201, var_7037))[name = string("input_305")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_7032_to_fp16 = const()[name = string("op_7032_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_7032_to_fp16, x = input_305)[name = string("normed_289_cast_fp16")]; + tensor var_7042_split_sizes_0 = const()[name = string("op_7042_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7042_axis_0 = const()[name = string("op_7042_axis_0"), val = int32(-1)]; + tensor var_7042_0, tensor var_7042_1 = split(axis = var_7042_axis_0, split_sizes = var_7042_split_sizes_0, x = normed_289_cast_fp16)[name = string("op_7042")]; + tensor q_125 = mul(x = var_7042_0, y = layers_3_self_attn_q_norm_weight)[name = string("q_125")]; + tensor var_7049 = const()[name = string("op_7049"), val = tensor([1, 3, 8, 256])]; + tensor var_7050 = reshape(shape = var_7049, x = q_125)[name = string("op_7050")]; + tensor var_7055 = const()[name = string("op_7055"), val = tensor([0, 2, 1, 3])]; + tensor q_127 = transpose(perm = var_7055, x = var_7050)[name = string("transpose_37")]; + tensor var_7057_cast_fp16 = mul(x = q_127, y = cos_s)[name = string("op_7057_cast_fp16")]; + tensor var_7058_split_sizes_0 = const()[name = string("op_7058_split_sizes_0"), val = tensor([128, 128])]; + int32 var_7058_axis_0 = const()[name = string("op_7058_axis_0"), val = int32(-1)]; + tensor var_7058_0, tensor var_7058_1 = split(axis = var_7058_axis_0, split_sizes = var_7058_split_sizes_0, x = q_127)[name = string("op_7058")]; + fp16 const_121_promoted = const()[name = string("const_121_promoted"), val = fp16(-0x1p+0)]; + tensor var_7060 = mul(x = var_7058_1, y = const_121_promoted)[name = string("op_7060")]; + int32 var_7062 = const()[name = string("op_7062"), val = int32(-1)]; + bool var_7063_interleave_0 = const()[name = string("op_7063_interleave_0"), val = bool(false)]; + tensor var_7063 = concat(axis = var_7062, interleave = var_7063_interleave_0, values = (var_7060, var_7058_0))[name = string("op_7063")]; + tensor var_7064_cast_fp16 = mul(x = var_7063, y = sin_s)[name = string("op_7064_cast_fp16")]; + tensor q_131_cast_fp16 = add(x = var_7057_cast_fp16, y = var_7064_cast_fp16)[name = string("q_131_cast_fp16")]; + string k_63_pad_type_0 = const()[name = string("k_63_pad_type_0"), val = string("valid")]; + tensor k_63_strides_0 = const()[name = string("k_63_strides_0"), val = tensor([1, 1])]; + tensor k_63_pad_0 = const()[name = string("k_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_63_dilations_0 = const()[name = string("k_63_dilations_0"), val = tensor([1, 1])]; + int32 k_63_groups_0 = const()[name = string("k_63_groups_0"), val = int32(1)]; + tensor k_63 = conv(dilations = k_63_dilations_0, groups = k_63_groups_0, pad = k_63_pad_0, pad_type = k_63_pad_type_0, strides = k_63_strides_0, weight = layers_10_self_attn_k_proj_weight_palettized, x = var_6984_cast_fp16)[name = string("k_63")]; + tensor var_7082 = const()[name = string("op_7082"), val = tensor([1, 2, 256, 3])]; + tensor var_7083 = reshape(shape = var_7082, x = k_63)[name = string("op_7083")]; + tensor transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_23_pad_type_0 = const()[name = string("v_23_pad_type_0"), val = string("valid")]; + tensor v_23_strides_0 = const()[name = string("v_23_strides_0"), val = tensor([1, 1])]; + tensor v_23_pad_0 = const()[name = string("v_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_23_dilations_0 = const()[name = string("v_23_dilations_0"), val = tensor([1, 1])]; + int32 v_23_groups_0 = const()[name = string("v_23_groups_0"), val = int32(1)]; + tensor v_23 = conv(dilations = v_23_dilations_0, groups = v_23_groups_0, pad = v_23_pad_0, pad_type = v_23_pad_type_0, strides = v_23_strides_0, weight = layers_10_self_attn_v_proj_weight_palettized, x = var_6984_cast_fp16)[name = string("v_23")]; + tensor var_7110 = const()[name = string("op_7110"), val = tensor([1, 2, 256, 3])]; + tensor var_7111 = reshape(shape = var_7110, x = v_23)[name = string("op_7111")]; + tensor var_7116 = const()[name = string("op_7116"), val = tensor([0, 1, 3, 2])]; + tensor var_7134 = const()[name = string("op_7134"), val = tensor([3, 2, 256])]; + tensor transpose_79 = transpose(perm = transpose_79_perm_0, x = var_7083)[name = string("transpose_36")]; + tensor x_203 = reshape(shape = var_7134, x = transpose_79)[name = string("x_203")]; + int32 var_7140 = const()[name = string("op_7140"), val = int32(-1)]; + fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; + tensor var_7142 = mul(x = x_203, y = const_122_promoted)[name = string("op_7142")]; + bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; + tensor input_307 = concat(axis = var_7140, interleave = input_307_interleave_0, values = (x_203, var_7142))[name = string("input_307")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_7137_to_fp16 = const()[name = string("op_7137_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_7137_to_fp16, x = input_307)[name = string("normed_293_cast_fp16")]; + tensor var_7147_split_sizes_0 = const()[name = string("op_7147_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7147_axis_0 = const()[name = string("op_7147_axis_0"), val = int32(-1)]; + tensor var_7147_0, tensor var_7147_1 = split(axis = var_7147_axis_0, split_sizes = var_7147_split_sizes_0, x = normed_293_cast_fp16)[name = string("op_7147")]; + tensor k_67 = mul(x = var_7147_0, y = layers_6_self_attn_k_norm_weight)[name = string("k_67")]; + tensor var_7154 = const()[name = string("op_7154"), val = tensor([1, 3, 2, 256])]; + tensor var_7155 = reshape(shape = var_7154, x = k_67)[name = string("op_7155")]; + tensor var_7160 = const()[name = string("op_7160"), val = tensor([0, 2, 1, 3])]; + fp16 var_7162_promoted = const()[name = string("op_7162_promoted"), val = fp16(0x1p+1)]; + tensor var_7117 = transpose(perm = var_7116, x = var_7111)[name = string("transpose_35")]; + tensor var_7163 = pow(x = var_7117, y = var_7162_promoted)[name = string("op_7163")]; + tensor var_7168_axes_0 = const()[name = string("op_7168_axes_0"), val = tensor([-1])]; + bool var_7168_keep_dims_0 = const()[name = string("op_7168_keep_dims_0"), val = bool(true)]; + tensor var_7168 = reduce_mean(axes = var_7168_axes_0, keep_dims = var_7168_keep_dims_0, x = var_7163)[name = string("op_7168")]; + fp16 var_7170_to_fp16 = const()[name = string("op_7170_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_21_cast_fp16 = add(x = var_7168, y = var_7170_to_fp16)[name = string("mean_sq_21_cast_fp16")]; + fp32 var_7172_epsilon_0 = const()[name = string("op_7172_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7172_cast_fp16 = rsqrt(epsilon = var_7172_epsilon_0, x = mean_sq_21_cast_fp16)[name = string("op_7172_cast_fp16")]; + tensor input_311_cast_fp16 = mul(x = var_7117, y = var_7172_cast_fp16)[name = string("input_311_cast_fp16")]; + tensor q_129 = transpose(perm = var_7160, x = var_7155)[name = string("transpose_34")]; + tensor var_7174_cast_fp16 = mul(x = q_129, y = cos_s)[name = string("op_7174_cast_fp16")]; + tensor var_7175_split_sizes_0 = const()[name = string("op_7175_split_sizes_0"), val = tensor([128, 128])]; + int32 var_7175_axis_0 = const()[name = string("op_7175_axis_0"), val = int32(-1)]; + tensor var_7175_0, tensor var_7175_1 = split(axis = var_7175_axis_0, split_sizes = var_7175_split_sizes_0, x = q_129)[name = string("op_7175")]; + fp16 const_123_promoted = const()[name = string("const_123_promoted"), val = fp16(-0x1p+0)]; + tensor var_7177 = mul(x = var_7175_1, y = const_123_promoted)[name = string("op_7177")]; + int32 var_7179 = const()[name = string("op_7179"), val = int32(-1)]; + bool var_7180_interleave_0 = const()[name = string("op_7180_interleave_0"), val = bool(false)]; + tensor var_7180 = concat(axis = var_7179, interleave = var_7180_interleave_0, values = (var_7177, var_7175_0))[name = string("op_7180")]; + tensor var_7181_cast_fp16 = mul(x = var_7180, y = sin_s)[name = string("op_7181_cast_fp16")]; + tensor input_309_cast_fp16 = add(x = var_7174_cast_fp16, y = var_7181_cast_fp16)[name = string("input_309_cast_fp16")]; + tensor k_padded_pad_0 = const()[name = string("k_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_mode_0 = const()[name = string("k_padded_mode_0"), val = string("constant")]; + fp16 const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_cast_fp16 = pad(constant_val = const_124_to_fp16, mode = k_padded_mode_0, pad = k_padded_pad_0, x = input_309_cast_fp16)[name = string("k_padded_cast_fp16")]; + tensor v_padded_pad_0 = const()[name = string("v_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_mode_0 = const()[name = string("v_padded_mode_0"), val = string("constant")]; + fp16 const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_cast_fp16 = pad(constant_val = const_125_to_fp16, mode = v_padded_mode_0, pad = v_padded_pad_0, x = input_311_cast_fp16)[name = string("v_padded_cast_fp16")]; + tensor slot_k_21_begin_0 = const()[name = string("slot_k_21_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor slot_k_21_end_0 = const()[name = string("slot_k_21_end_0"), val = tensor([1, 2, 512, 512])]; + tensor slot_k_21_end_mask_0 = const()[name = string("slot_k_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor slot_k_21_cast_fp16 = slice_by_index(begin = slot_k_21_begin_0, end = slot_k_21_end_0, end_mask = slot_k_21_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("slot_k_21_cast_fp16")]; + tensor slot_v_21_begin_0 = const()[name = string("slot_v_21_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor slot_v_21_end_0 = const()[name = string("slot_v_21_end_0"), val = tensor([1, 2, 512, 512])]; + tensor slot_v_21_end_mask_0 = const()[name = string("slot_v_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor slot_v_21_cast_fp16 = slice_by_index(begin = slot_v_21_begin_0, end = slot_v_21_end_0, end_mask = slot_v_21_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("slot_v_21_cast_fp16")]; + tensor var_7220_begin_0 = const()[name = string("op_7220_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_7220_end_0 = const()[name = string("op_7220_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_7220_end_mask_0 = const()[name = string("op_7220_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7220_cast_fp16 = slice_by_index(begin = var_7220_begin_0, end = var_7220_end_0, end_mask = var_7220_end_mask_0, x = slot_k_21_cast_fp16)[name = string("op_7220_cast_fp16")]; + int32 var_7227 = const()[name = string("op_7227"), val = int32(2)]; + bool new_k_21_interleave_0 = const()[name = string("new_k_21_interleave_0"), val = bool(false)]; + tensor new_k_21_cast_fp16 = concat(axis = var_7227, interleave = new_k_21_interleave_0, values = (var_7220_cast_fp16, k_padded_cast_fp16))[name = string("new_k_21_cast_fp16")]; + tensor var_7243_begin_0 = const()[name = string("op_7243_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_7243_end_0 = const()[name = string("op_7243_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_7243_end_mask_0 = const()[name = string("op_7243_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7243_cast_fp16 = slice_by_index(begin = var_7243_begin_0, end = var_7243_end_0, end_mask = var_7243_end_mask_0, x = slot_v_21_cast_fp16)[name = string("op_7243_cast_fp16")]; + int32 var_7250 = const()[name = string("op_7250"), val = int32(2)]; + bool new_v_21_interleave_0 = const()[name = string("new_v_21_interleave_0"), val = bool(false)]; + tensor new_v_21_cast_fp16 = concat(axis = var_7250, interleave = new_v_21_interleave_0, values = (var_7243_cast_fp16, v_padded_cast_fp16))[name = string("new_v_21_cast_fp16")]; + tensor var_7256_begin_0 = const()[name = string("op_7256_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7256_end_0 = const()[name = string("op_7256_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_7256_end_mask_0 = const()[name = string("op_7256_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7256_cast_fp16 = slice_by_index(begin = var_7256_begin_0, end = var_7256_end_0, end_mask = var_7256_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("op_7256_cast_fp16")]; + int32 var_7263 = const()[name = string("op_7263"), val = int32(0)]; + bool K_sliding_out_interleave_0 = const()[name = string("K_sliding_out_interleave_0"), val = bool(false)]; + tensor K_sliding_out = concat(axis = var_7263, interleave = K_sliding_out_interleave_0, values = (var_7256_cast_fp16, new_k_21_cast_fp16))[name = string("K_sliding_out_cast_fp16")]; + tensor var_7269_begin_0 = const()[name = string("op_7269_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7269_end_0 = const()[name = string("op_7269_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_7269_end_mask_0 = const()[name = string("op_7269_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7269_cast_fp16 = slice_by_index(begin = var_7269_begin_0, end = var_7269_end_0, end_mask = var_7269_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("op_7269_cast_fp16")]; + int32 var_7276 = const()[name = string("op_7276"), val = int32(0)]; + bool V_sliding_out_interleave_0 = const()[name = string("V_sliding_out_interleave_0"), val = bool(false)]; + tensor V_sliding_out = concat(axis = var_7276, interleave = V_sliding_out_interleave_0, values = (var_7269_cast_fp16, new_v_21_cast_fp16))[name = string("V_sliding_out_cast_fp16")]; + tensor var_7282_begin_0 = const()[name = string("op_7282_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_7282_end_0 = const()[name = string("op_7282_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_7282_end_mask_0 = const()[name = string("op_7282_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7282_cast_fp16 = slice_by_index(begin = var_7282_begin_0, end = var_7282_end_0, end_mask = var_7282_end_mask_0, x = K_sliding_out)[name = string("op_7282_cast_fp16")]; + tensor K_for_attn_21_begin_0 = const()[name = string("K_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_21_end_0 = const()[name = string("K_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_21_end_mask_0 = const()[name = string("K_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_21_cast_fp16 = slice_by_index(begin = K_for_attn_21_begin_0, end = K_for_attn_21_end_0, end_mask = K_for_attn_21_end_mask_0, x = var_7282_cast_fp16)[name = string("K_for_attn_21_cast_fp16")]; + tensor var_7292_begin_0 = const()[name = string("op_7292_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_7292_end_0 = const()[name = string("op_7292_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_7292_end_mask_0 = const()[name = string("op_7292_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7292_cast_fp16 = slice_by_index(begin = var_7292_begin_0, end = var_7292_end_0, end_mask = var_7292_end_mask_0, x = V_sliding_out)[name = string("op_7292_cast_fp16")]; + tensor V_for_attn_21_begin_0 = const()[name = string("V_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_21_end_0 = const()[name = string("V_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_21_end_mask_0 = const()[name = string("V_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_21_cast_fp16 = slice_by_index(begin = V_for_attn_21_begin_0, end = V_for_attn_21_end_0, end_mask = V_for_attn_21_end_mask_0, x = var_7292_cast_fp16)[name = string("V_for_attn_21_cast_fp16")]; + tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_20_reps_0 = const()[name = string("tile_20_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_40_cast_fp16 = transpose(perm = transpose_40_perm_0, x = K_for_attn_21_cast_fp16)[name = string("transpose_33")]; + tensor tile_20_cast_fp16 = tile(reps = tile_20_reps_0, x = transpose_40_cast_fp16)[name = string("tile_20_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_40_cast_fp16 = reshape(shape = concat_42, x = tile_20_cast_fp16)[name = string("reshape_40_cast_fp16")]; + tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_40_cast_fp16)[name = string("transpose_32")]; + tensor reshape_41_cast_fp16 = reshape(shape = concat_43, x = transpose_41_cast_fp16)[name = string("reshape_41_cast_fp16")]; + tensor transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_21_reps_0 = const()[name = string("tile_21_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_42_cast_fp16 = transpose(perm = transpose_42_perm_0, x = V_for_attn_21_cast_fp16)[name = string("transpose_31")]; + tensor tile_21_cast_fp16 = tile(reps = tile_21_reps_0, x = transpose_42_cast_fp16)[name = string("tile_21_cast_fp16")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_42_cast_fp16 = reshape(shape = concat_44, x = tile_21_cast_fp16)[name = string("reshape_42_cast_fp16")]; + tensor transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_45 = const()[name = string("concat_45"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_43_cast_fp16 = transpose(perm = transpose_43_perm_0, x = reshape_42_cast_fp16)[name = string("transpose_30")]; + tensor reshape_43_cast_fp16 = reshape(shape = concat_45, x = transpose_43_cast_fp16)[name = string("reshape_43_cast_fp16")]; + tensor V_expanded_21_perm_0 = const()[name = string("V_expanded_21_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor transpose_80_cast_fp16 = transpose(perm = transpose_80_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_29")]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = q_131_cast_fp16, y = transpose_80_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask_sliding)[name = string("x_207_cast_fp16")]; + tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; + bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; + tensor reduce_max_10 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = x_207_cast_fp16)[name = string("reduce_max_10")]; + tensor var_7327 = sub(x = x_207_cast_fp16, y = reduce_max_10)[name = string("op_7327")]; + tensor var_7333 = exp(x = var_7327)[name = string("op_7333")]; + tensor var_7343_axes_0 = const()[name = string("op_7343_axes_0"), val = tensor([-1])]; + bool var_7343_keep_dims_0 = const()[name = string("op_7343_keep_dims_0"), val = bool(true)]; + tensor var_7343 = reduce_sum(axes = var_7343_axes_0, keep_dims = var_7343_keep_dims_0, x = var_7333)[name = string("op_7343")]; + tensor var_7349_cast_fp16 = real_div(x = var_7333, y = var_7343)[name = string("op_7349_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor V_expanded_21_cast_fp16 = transpose(perm = V_expanded_21_perm_0, x = reshape_43_cast_fp16)[name = string("transpose_28")]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = var_7349_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_7360 = const()[name = string("op_7360"), val = tensor([0, 2, 1, 3])]; + tensor var_7367 = const()[name = string("op_7367"), val = tensor([1, 3, -1])]; + tensor var_7361_cast_fp16 = transpose(perm = var_7360, x = attn_output_61_cast_fp16)[name = string("transpose_27")]; + tensor attn_output_63_cast_fp16 = reshape(shape = var_7367, x = var_7361_cast_fp16)[name = string("attn_output_63_cast_fp16")]; + tensor var_7372 = const()[name = string("op_7372"), val = tensor([0, 2, 1])]; + string var_7388_pad_type_0 = const()[name = string("op_7388_pad_type_0"), val = string("valid")]; + int32 var_7388_groups_0 = const()[name = string("op_7388_groups_0"), val = int32(1)]; + tensor var_7388_strides_0 = const()[name = string("op_7388_strides_0"), val = tensor([1])]; + tensor var_7388_pad_0 = const()[name = string("op_7388_pad_0"), val = tensor([0, 0])]; + tensor var_7388_dilations_0 = const()[name = string("op_7388_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(577403712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580025216))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7373_cast_fp16 = transpose(perm = var_7372, x = attn_output_63_cast_fp16)[name = string("transpose_26")]; + tensor var_7388_cast_fp16 = conv(dilations = var_7388_dilations_0, groups = var_7388_groups_0, pad = var_7388_pad_0, pad_type = var_7388_pad_type_0, strides = var_7388_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_7373_cast_fp16)[name = string("op_7388_cast_fp16")]; + tensor var_7392 = const()[name = string("op_7392"), val = tensor([0, 2, 1])]; + int32 var_7398 = const()[name = string("op_7398"), val = int32(-1)]; + fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_211_cast_fp16 = transpose(perm = var_7392, x = var_7388_cast_fp16)[name = string("transpose_25")]; + tensor var_7400_cast_fp16 = mul(x = x_211_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_7400_cast_fp16")]; + bool input_315_interleave_0 = const()[name = string("input_315_interleave_0"), val = bool(false)]; + tensor input_315_cast_fp16 = concat(axis = var_7398, interleave = input_315_interleave_0, values = (x_211_cast_fp16, var_7400_cast_fp16))[name = string("input_315_cast_fp16")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_7395_to_fp16 = const()[name = string("op_7395_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_7395_to_fp16, x = input_315_cast_fp16)[name = string("normed_297_cast_fp16")]; + tensor var_7405_split_sizes_0 = const()[name = string("op_7405_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7405_axis_0 = const()[name = string("op_7405_axis_0"), val = int32(-1)]; + tensor var_7405_cast_fp16_0, tensor var_7405_cast_fp16_1 = split(axis = var_7405_axis_0, split_sizes = var_7405_split_sizes_0, x = normed_297_cast_fp16)[name = string("op_7405_cast_fp16")]; + tensor layers_10_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580027840)))]; + tensor attn_output_65_cast_fp16 = mul(x = var_7405_cast_fp16_0, y = layers_10_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_199_cast_fp16, y = attn_output_65_cast_fp16)[name = string("x_213_cast_fp16")]; + int32 var_7414 = const()[name = string("op_7414"), val = int32(-1)]; + fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7416_cast_fp16 = mul(x = x_213_cast_fp16, y = const_127_promoted_to_fp16)[name = string("op_7416_cast_fp16")]; + bool input_317_interleave_0 = const()[name = string("input_317_interleave_0"), val = bool(false)]; + tensor input_317_cast_fp16 = concat(axis = var_7414, interleave = input_317_interleave_0, values = (x_213_cast_fp16, var_7416_cast_fp16))[name = string("input_317_cast_fp16")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_7411_to_fp16 = const()[name = string("op_7411_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_7411_to_fp16, x = input_317_cast_fp16)[name = string("normed_301_cast_fp16")]; + tensor var_7421_split_sizes_0 = const()[name = string("op_7421_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7421_axis_0 = const()[name = string("op_7421_axis_0"), val = int32(-1)]; + tensor var_7421_cast_fp16_0, tensor var_7421_cast_fp16_1 = split(axis = var_7421_axis_0, split_sizes = var_7421_split_sizes_0, x = normed_301_cast_fp16)[name = string("op_7421_cast_fp16")]; + tensor layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580033024)))]; + tensor h_63_cast_fp16 = mul(x = var_7421_cast_fp16_0, y = layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_63_cast_fp16")]; + tensor var_7432 = const()[name = string("op_7432"), val = tensor([0, 2, 1])]; + tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; + tensor var_7433 = transpose(perm = var_7432, x = h_63_cast_fp16)[name = string("transpose_24")]; + tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_7433)[name = string("input_319")]; + string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; + tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; + tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; + int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; + tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = layers_10_mlp_gate_proj_weight_palettized, x = input_319)[name = string("gate_41")]; + string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; + tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; + tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; + int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; + tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = layers_10_mlp_up_proj_weight_palettized, x = input_319)[name = string("up_21")]; + string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; + tensor input_321 = mul(x = gate_43, y = up_21)[name = string("input_321")]; + string mlp_out_21_pad_type_0 = const()[name = string("mlp_out_21_pad_type_0"), val = string("valid")]; + tensor mlp_out_21_strides_0 = const()[name = string("mlp_out_21_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_21_pad_0 = const()[name = string("mlp_out_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_21_dilations_0 = const()[name = string("mlp_out_21_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_21_groups_0 = const()[name = string("mlp_out_21_groups_0"), val = int32(1)]; + tensor mlp_out_21 = conv(dilations = mlp_out_21_dilations_0, groups = mlp_out_21_groups_0, pad = mlp_out_21_pad_0, pad_type = mlp_out_21_pad_type_0, strides = mlp_out_21_strides_0, weight = layers_10_mlp_down_proj_weight_palettized, x = input_321)[name = string("mlp_out_21")]; + tensor var_7473_axes_0 = const()[name = string("op_7473_axes_0"), val = tensor([2])]; + tensor var_7473 = squeeze(axes = var_7473_axes_0, x = mlp_out_21)[name = string("op_7473")]; + tensor var_7477 = const()[name = string("op_7477"), val = tensor([0, 2, 1])]; + int32 var_7483 = const()[name = string("op_7483"), val = int32(-1)]; + fp16 const_128_promoted = const()[name = string("const_128_promoted"), val = fp16(-0x1p+0)]; + tensor x_215 = transpose(perm = var_7477, x = var_7473)[name = string("transpose_23")]; + tensor var_7485 = mul(x = x_215, y = const_128_promoted)[name = string("op_7485")]; + bool input_323_interleave_0 = const()[name = string("input_323_interleave_0"), val = bool(false)]; + tensor input_323 = concat(axis = var_7483, interleave = input_323_interleave_0, values = (x_215, var_7485))[name = string("input_323")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_7480_to_fp16 = const()[name = string("op_7480_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_7480_to_fp16, x = input_323)[name = string("normed_305_cast_fp16")]; + tensor var_7490_split_sizes_0 = const()[name = string("op_7490_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7490_axis_0 = const()[name = string("op_7490_axis_0"), val = int32(-1)]; + tensor var_7490_0, tensor var_7490_1 = split(axis = var_7490_axis_0, split_sizes = var_7490_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_7490")]; + tensor hidden_states_103 = mul(x = var_7490_0, y = layers_10_post_feedforward_layernorm_weight)[name = string("hidden_states_103")]; + tensor hidden_states_105_cast_fp16 = add(x = x_213_cast_fp16, y = hidden_states_103)[name = string("hidden_states_105_cast_fp16")]; + tensor per_layer_slice_21_begin_0 = const()[name = string("per_layer_slice_21_begin_0"), val = tensor([0, 0, 2560])]; + tensor per_layer_slice_21_end_0 = const()[name = string("per_layer_slice_21_end_0"), val = tensor([1, 3, 2816])]; + tensor per_layer_slice_21_end_mask_0 = const()[name = string("per_layer_slice_21_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_21_cast_fp16 = slice_by_index(begin = per_layer_slice_21_begin_0, end = per_layer_slice_21_end_0, end_mask = per_layer_slice_21_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_21_cast_fp16")]; + tensor var_7518 = const()[name = string("op_7518"), val = tensor([0, 2, 1])]; + tensor input_325_axes_0 = const()[name = string("input_325_axes_0"), val = tensor([2])]; + tensor var_7519 = transpose(perm = var_7518, x = hidden_states_105_cast_fp16)[name = string("transpose_22")]; + tensor input_325 = expand_dims(axes = input_325_axes_0, x = var_7519)[name = string("input_325")]; + string gated_61_pad_type_0 = const()[name = string("gated_61_pad_type_0"), val = string("valid")]; + tensor gated_61_strides_0 = const()[name = string("gated_61_strides_0"), val = tensor([1, 1])]; + tensor gated_61_pad_0 = const()[name = string("gated_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_61_dilations_0 = const()[name = string("gated_61_dilations_0"), val = tensor([1, 1])]; + int32 gated_61_groups_0 = const()[name = string("gated_61_groups_0"), val = int32(1)]; + tensor gated_61 = conv(dilations = gated_61_dilations_0, groups = gated_61_groups_0, pad = gated_61_pad_0, pad_type = gated_61_pad_type_0, strides = gated_61_strides_0, weight = layers_10_per_layer_input_gate_weight_palettized, x = input_325)[name = string("gated_61")]; + string gated_63_mode_0 = const()[name = string("gated_63_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_63 = gelu(mode = gated_63_mode_0, x = gated_61)[name = string("gated_63")]; + tensor var_7538 = const()[name = string("op_7538"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_21_axes_0 = const()[name = string("per_layer_slice_conv_21_axes_0"), val = tensor([2])]; + tensor var_7539_cast_fp16 = transpose(perm = var_7538, x = per_layer_slice_21_cast_fp16)[name = string("transpose_21")]; + tensor per_layer_slice_conv_21_cast_fp16 = expand_dims(axes = per_layer_slice_conv_21_axes_0, x = var_7539_cast_fp16)[name = string("per_layer_slice_conv_21_cast_fp16")]; + tensor input_327_cast_fp16 = mul(x = gated_63, y = per_layer_slice_conv_21_cast_fp16)[name = string("input_327_cast_fp16")]; + string gated_65_pad_type_0 = const()[name = string("gated_65_pad_type_0"), val = string("valid")]; + tensor gated_65_strides_0 = const()[name = string("gated_65_strides_0"), val = tensor([1, 1])]; + tensor gated_65_pad_0 = const()[name = string("gated_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_65_dilations_0 = const()[name = string("gated_65_dilations_0"), val = tensor([1, 1])]; + int32 gated_65_groups_0 = const()[name = string("gated_65_groups_0"), val = int32(1)]; + tensor layers_10_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580038208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580365952))))[name = string("layers_10_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_65_cast_fp16 = conv(dilations = gated_65_dilations_0, groups = gated_65_groups_0, pad = gated_65_pad_0, pad_type = gated_65_pad_type_0, strides = gated_65_strides_0, weight = layers_10_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_327_cast_fp16)[name = string("gated_65_cast_fp16")]; + tensor var_7555_axes_0 = const()[name = string("op_7555_axes_0"), val = tensor([2])]; + tensor var_7555_cast_fp16 = squeeze(axes = var_7555_axes_0, x = gated_65_cast_fp16)[name = string("op_7555_cast_fp16")]; + tensor var_7559 = const()[name = string("op_7559"), val = tensor([0, 2, 1])]; + int32 var_7565 = const()[name = string("op_7565"), val = int32(-1)]; + fp16 const_129_promoted_to_fp16 = const()[name = string("const_129_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_217_cast_fp16 = transpose(perm = var_7559, x = var_7555_cast_fp16)[name = string("transpose_20")]; + tensor var_7567_cast_fp16 = mul(x = x_217_cast_fp16, y = const_129_promoted_to_fp16)[name = string("op_7567_cast_fp16")]; + bool input_329_interleave_0 = const()[name = string("input_329_interleave_0"), val = bool(false)]; + tensor input_329_cast_fp16 = concat(axis = var_7565, interleave = input_329_interleave_0, values = (x_217_cast_fp16, var_7567_cast_fp16))[name = string("input_329_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_7562_to_fp16 = const()[name = string("op_7562_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_7562_to_fp16, x = input_329_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor var_7572_split_sizes_0 = const()[name = string("op_7572_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7572_axis_0 = const()[name = string("op_7572_axis_0"), val = int32(-1)]; + tensor var_7572_cast_fp16_0, tensor var_7572_cast_fp16_1 = split(axis = var_7572_axis_0, split_sizes = var_7572_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_7572_cast_fp16")]; + tensor layers_10_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_10_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580368576)))]; + tensor hidden_states_109_cast_fp16 = mul(x = var_7572_cast_fp16_0, y = layers_10_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_109_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_105_cast_fp16, y = hidden_states_109_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + tensor const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = tensor([0x1.3ep-1])]; + tensor x_219_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_130_promoted_to_fp16)[name = string("x_219_cast_fp16")]; + int32 var_7587 = const()[name = string("op_7587"), val = int32(-1)]; + fp16 const_131_promoted_to_fp16 = const()[name = string("const_131_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7589_cast_fp16 = mul(x = x_219_cast_fp16, y = const_131_promoted_to_fp16)[name = string("op_7589_cast_fp16")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331_cast_fp16 = concat(axis = var_7587, interleave = input_331_interleave_0, values = (x_219_cast_fp16, var_7589_cast_fp16))[name = string("input_331_cast_fp16")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_7584_to_fp16 = const()[name = string("op_7584_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_7584_to_fp16, x = input_331_cast_fp16)[name = string("normed_313_cast_fp16")]; + tensor var_7594_split_sizes_0 = const()[name = string("op_7594_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7594_axis_0 = const()[name = string("op_7594_axis_0"), val = int32(-1)]; + tensor var_7594_cast_fp16_0, tensor var_7594_cast_fp16_1 = split(axis = var_7594_axis_0, split_sizes = var_7594_split_sizes_0, x = normed_313_cast_fp16)[name = string("op_7594_cast_fp16")]; + tensor layers_11_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580373760)))]; + tensor h_67_cast_fp16 = mul(x = var_7594_cast_fp16_0, y = layers_11_input_layernorm_weight_promoted_to_fp16)[name = string("h_67_cast_fp16")]; + tensor var_7600 = const()[name = string("op_7600"), val = tensor([0, 2, 1])]; + tensor var_7603_axes_0 = const()[name = string("op_7603_axes_0"), val = tensor([2])]; + tensor var_7601_cast_fp16 = transpose(perm = var_7600, x = h_67_cast_fp16)[name = string("transpose_19")]; + tensor var_7603_cast_fp16 = expand_dims(axes = var_7603_axes_0, x = var_7601_cast_fp16)[name = string("op_7603_cast_fp16")]; + string q_133_pad_type_0 = const()[name = string("q_133_pad_type_0"), val = string("valid")]; + tensor q_133_strides_0 = const()[name = string("q_133_strides_0"), val = tensor([1, 1])]; + tensor q_133_pad_0 = const()[name = string("q_133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_133_dilations_0 = const()[name = string("q_133_dilations_0"), val = tensor([1, 1])]; + int32 q_133_groups_0 = const()[name = string("q_133_groups_0"), val = int32(1)]; + tensor q_133 = conv(dilations = q_133_dilations_0, groups = q_133_groups_0, pad = q_133_pad_0, pad_type = q_133_pad_type_0, strides = q_133_strides_0, weight = layers_11_self_attn_q_proj_weight_palettized, x = var_7603_cast_fp16)[name = string("q_133")]; + tensor var_7624 = const()[name = string("op_7624"), val = tensor([1, 8, 512, 3])]; + tensor var_7625 = reshape(shape = var_7624, x = q_133)[name = string("op_7625")]; + tensor transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_7648 = const()[name = string("op_7648"), val = tensor([3, 8, 512])]; + tensor transpose_81 = transpose(perm = transpose_81_perm_0, x = var_7625)[name = string("transpose_18")]; + tensor x_221 = reshape(shape = var_7648, x = transpose_81)[name = string("x_221")]; + int32 var_7654 = const()[name = string("op_7654"), val = int32(-1)]; + fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; + tensor var_7656 = mul(x = x_221, y = const_132_promoted)[name = string("op_7656")]; + bool input_335_interleave_0 = const()[name = string("input_335_interleave_0"), val = bool(false)]; + tensor input_335 = concat(axis = var_7654, interleave = input_335_interleave_0, values = (x_221, var_7656))[name = string("input_335")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_7651_to_fp16 = const()[name = string("op_7651_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_7651_to_fp16, x = input_335)[name = string("normed_317_cast_fp16")]; + tensor var_7661_split_sizes_0 = const()[name = string("op_7661_split_sizes_0"), val = tensor([512, 512])]; + int32 var_7661_axis_0 = const()[name = string("op_7661_axis_0"), val = int32(-1)]; + tensor var_7661_0, tensor var_7661_1 = split(axis = var_7661_axis_0, split_sizes = var_7661_split_sizes_0, x = normed_317_cast_fp16)[name = string("op_7661")]; + tensor q_137 = mul(x = var_7661_0, y = layers_11_self_attn_q_norm_weight)[name = string("q_137")]; + tensor var_7668 = const()[name = string("op_7668"), val = tensor([1, 3, 8, 512])]; + tensor var_7669 = reshape(shape = var_7668, x = q_137)[name = string("op_7669")]; + tensor var_7674 = const()[name = string("op_7674"), val = tensor([0, 2, 1, 3])]; + tensor q_139 = transpose(perm = var_7674, x = var_7669)[name = string("transpose_17")]; + tensor var_7676_cast_fp16 = mul(x = q_139, y = cos_f)[name = string("op_7676_cast_fp16")]; + tensor var_7677_split_sizes_0 = const()[name = string("op_7677_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7677_axis_0 = const()[name = string("op_7677_axis_0"), val = int32(-1)]; + tensor var_7677_0, tensor var_7677_1 = split(axis = var_7677_axis_0, split_sizes = var_7677_split_sizes_0, x = q_139)[name = string("op_7677")]; + fp16 const_133_promoted = const()[name = string("const_133_promoted"), val = fp16(-0x1p+0)]; + tensor var_7679 = mul(x = var_7677_1, y = const_133_promoted)[name = string("op_7679")]; + int32 var_7681 = const()[name = string("op_7681"), val = int32(-1)]; + bool var_7682_interleave_0 = const()[name = string("op_7682_interleave_0"), val = bool(false)]; + tensor var_7682 = concat(axis = var_7681, interleave = var_7682_interleave_0, values = (var_7679, var_7677_0))[name = string("op_7682")]; + tensor var_7683_cast_fp16 = mul(x = var_7682, y = sin_f)[name = string("op_7683_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_7676_cast_fp16, y = var_7683_cast_fp16)[name = string("q_cast_fp16")]; + string k_69_pad_type_0 = const()[name = string("k_69_pad_type_0"), val = string("valid")]; + tensor k_69_strides_0 = const()[name = string("k_69_strides_0"), val = tensor([1, 1])]; + tensor k_69_pad_0 = const()[name = string("k_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_69_dilations_0 = const()[name = string("k_69_dilations_0"), val = tensor([1, 1])]; + int32 k_69_groups_0 = const()[name = string("k_69_groups_0"), val = int32(1)]; + tensor k_69 = conv(dilations = k_69_dilations_0, groups = k_69_groups_0, pad = k_69_pad_0, pad_type = k_69_pad_type_0, strides = k_69_strides_0, weight = layers_11_self_attn_k_proj_weight_palettized, x = var_7603_cast_fp16)[name = string("k_69")]; + tensor var_7701 = const()[name = string("op_7701"), val = tensor([1, 2, 512, 3])]; + tensor var_7702 = reshape(shape = var_7701, x = k_69)[name = string("op_7702")]; + tensor transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_25_pad_type_0 = const()[name = string("v_25_pad_type_0"), val = string("valid")]; + tensor v_25_strides_0 = const()[name = string("v_25_strides_0"), val = tensor([1, 1])]; + tensor v_25_pad_0 = const()[name = string("v_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_25_dilations_0 = const()[name = string("v_25_dilations_0"), val = tensor([1, 1])]; + int32 v_25_groups_0 = const()[name = string("v_25_groups_0"), val = int32(1)]; + tensor v_25 = conv(dilations = v_25_dilations_0, groups = v_25_groups_0, pad = v_25_pad_0, pad_type = v_25_pad_type_0, strides = v_25_strides_0, weight = layers_11_self_attn_v_proj_weight_palettized, x = var_7603_cast_fp16)[name = string("v_25")]; + tensor var_7729 = const()[name = string("op_7729"), val = tensor([1, 2, 512, 3])]; + tensor var_7730 = reshape(shape = var_7729, x = v_25)[name = string("op_7730")]; + tensor var_7735 = const()[name = string("op_7735"), val = tensor([0, 1, 3, 2])]; + tensor var_7753 = const()[name = string("op_7753"), val = tensor([3, 2, 512])]; + tensor transpose_82 = transpose(perm = transpose_82_perm_0, x = var_7702)[name = string("transpose_16")]; + tensor x_223 = reshape(shape = var_7753, x = transpose_82)[name = string("x_223")]; + int32 var_7759 = const()[name = string("op_7759"), val = int32(-1)]; + fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; + tensor var_7761 = mul(x = x_223, y = const_134_promoted)[name = string("op_7761")]; + bool input_337_interleave_0 = const()[name = string("input_337_interleave_0"), val = bool(false)]; + tensor input_337 = concat(axis = var_7759, interleave = input_337_interleave_0, values = (x_223, var_7761))[name = string("input_337")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_7756_to_fp16 = const()[name = string("op_7756_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_7756_to_fp16, x = input_337)[name = string("normed_321_cast_fp16")]; + tensor var_7766_split_sizes_0 = const()[name = string("op_7766_split_sizes_0"), val = tensor([512, 512])]; + int32 var_7766_axis_0 = const()[name = string("op_7766_axis_0"), val = int32(-1)]; + tensor var_7766_0, tensor var_7766_1 = split(axis = var_7766_axis_0, split_sizes = var_7766_split_sizes_0, x = normed_321_cast_fp16)[name = string("op_7766")]; + tensor k_73 = mul(x = var_7766_0, y = layers_11_self_attn_k_norm_weight)[name = string("k_73")]; + tensor var_7773 = const()[name = string("op_7773"), val = tensor([1, 3, 2, 512])]; + tensor var_7774 = reshape(shape = var_7773, x = k_73)[name = string("op_7774")]; + tensor var_7779 = const()[name = string("op_7779"), val = tensor([0, 2, 1, 3])]; + fp16 var_7781_promoted = const()[name = string("op_7781_promoted"), val = fp16(0x1p+1)]; + tensor var_7736 = transpose(perm = var_7735, x = var_7730)[name = string("transpose_15")]; + tensor var_7782 = pow(x = var_7736, y = var_7781_promoted)[name = string("op_7782")]; + tensor var_7787_axes_0 = const()[name = string("op_7787_axes_0"), val = tensor([-1])]; + bool var_7787_keep_dims_0 = const()[name = string("op_7787_keep_dims_0"), val = bool(true)]; + tensor var_7787 = reduce_mean(axes = var_7787_axes_0, keep_dims = var_7787_keep_dims_0, x = var_7782)[name = string("op_7787")]; + fp16 var_7789_to_fp16 = const()[name = string("op_7789_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_cast_fp16 = add(x = var_7787, y = var_7789_to_fp16)[name = string("mean_sq_cast_fp16")]; + fp32 var_7791_epsilon_0 = const()[name = string("op_7791_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7791_cast_fp16 = rsqrt(epsilon = var_7791_epsilon_0, x = mean_sq_cast_fp16)[name = string("op_7791_cast_fp16")]; + tensor v_cast_fp16 = mul(x = var_7736, y = var_7791_cast_fp16)[name = string("v_cast_fp16")]; + tensor q_141 = transpose(perm = var_7779, x = var_7774)[name = string("transpose_14")]; + tensor var_7793_cast_fp16 = mul(x = q_141, y = cos_f)[name = string("op_7793_cast_fp16")]; + tensor var_7794_split_sizes_0 = const()[name = string("op_7794_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7794_axis_0 = const()[name = string("op_7794_axis_0"), val = int32(-1)]; + tensor var_7794_0, tensor var_7794_1 = split(axis = var_7794_axis_0, split_sizes = var_7794_split_sizes_0, x = q_141)[name = string("op_7794")]; + fp16 const_135_promoted = const()[name = string("const_135_promoted"), val = fp16(-0x1p+0)]; + tensor var_7796 = mul(x = var_7794_1, y = const_135_promoted)[name = string("op_7796")]; + int32 var_7798 = const()[name = string("op_7798"), val = int32(-1)]; + bool var_7799_interleave_0 = const()[name = string("op_7799_interleave_0"), val = bool(false)]; + tensor var_7799 = concat(axis = var_7798, interleave = var_7799_interleave_0, values = (var_7796, var_7794_0))[name = string("op_7799")]; + tensor var_7800_cast_fp16 = mul(x = var_7799, y = sin_f)[name = string("op_7800_cast_fp16")]; + tensor k_cast_fp16 = add(x = var_7793_cast_fp16, y = var_7800_cast_fp16)[name = string("k_cast_fp16")]; + bool k_scattered_transpose_x_0 = const()[name = string("k_scattered_transpose_x_0"), val = bool(false)]; + bool k_scattered_transpose_y_0 = const()[name = string("k_scattered_transpose_y_0"), val = bool(false)]; + tensor k_scattered_cast_fp16 = matmul(transpose_x = k_scattered_transpose_x_0, transpose_y = k_scattered_transpose_y_0, x = var_4120_cast_fp16, y = k_cast_fp16)[name = string("k_scattered_cast_fp16")]; + bool v_scattered_transpose_x_0 = const()[name = string("v_scattered_transpose_x_0"), val = bool(false)]; + bool v_scattered_transpose_y_0 = const()[name = string("v_scattered_transpose_y_0"), val = bool(false)]; + tensor v_scattered_cast_fp16 = matmul(transpose_x = v_scattered_transpose_x_0, transpose_y = v_scattered_transpose_y_0, x = var_4120_cast_fp16, y = v_cast_fp16)[name = string("v_scattered_cast_fp16")]; + tensor slot_k_begin_0 = const()[name = string("slot_k_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor slot_k_end_0 = const()[name = string("slot_k_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor slot_k_end_mask_0 = const()[name = string("slot_k_end_mask_0"), val = tensor([true, true, true, true])]; + tensor slot_k_cast_fp16 = slice_by_index(begin = slot_k_begin_0, end = slot_k_end_0, end_mask = slot_k_end_mask_0, x = K_full_out_1_cast_fp16)[name = string("slot_k_cast_fp16")]; + tensor slot_v_begin_0 = const()[name = string("slot_v_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor slot_v_end_0 = const()[name = string("slot_v_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor slot_v_end_mask_0 = const()[name = string("slot_v_end_mask_0"), val = tensor([true, true, true, true])]; + tensor slot_v_cast_fp16 = slice_by_index(begin = slot_v_begin_0, end = slot_v_end_0, end_mask = slot_v_end_mask_0, x = V_full_out_1_cast_fp16)[name = string("slot_v_cast_fp16")]; + tensor var_7837_cast_fp16 = mul(x = slot_k_cast_fp16, y = var_4147_cast_fp16)[name = string("op_7837_cast_fp16")]; + tensor new_k_cast_fp16 = add(x = var_7837_cast_fp16, y = k_scattered_cast_fp16)[name = string("new_k_cast_fp16")]; + tensor var_7843_cast_fp16 = mul(x = slot_v_cast_fp16, y = var_4147_cast_fp16)[name = string("op_7843_cast_fp16")]; + tensor new_v_cast_fp16 = add(x = var_7843_cast_fp16, y = v_scattered_cast_fp16)[name = string("new_v_cast_fp16")]; + int32 var_7857 = const()[name = string("op_7857"), val = int32(0)]; + bool K_full_out_interleave_0 = const()[name = string("K_full_out_interleave_0"), val = bool(false)]; + tensor K_full_out = concat(axis = var_7857, interleave = K_full_out_interleave_0, values = (var_4187_cast_fp16, new_k_cast_fp16))[name = string("K_full_out_cast_fp16")]; + int32 var_7870 = const()[name = string("op_7870"), val = int32(0)]; + bool V_full_out_interleave_0 = const()[name = string("V_full_out_interleave_0"), val = bool(false)]; + tensor V_full_out = concat(axis = var_7870, interleave = V_full_out_interleave_0, values = (var_4197_cast_fp16, new_v_cast_fp16))[name = string("V_full_out_cast_fp16")]; + tensor var_7876_begin_0 = const()[name = string("op_7876_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_7876_end_0 = const()[name = string("op_7876_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_7876_end_mask_0 = const()[name = string("op_7876_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7876_cast_fp16 = slice_by_index(begin = var_7876_begin_0, end = var_7876_end_0, end_mask = var_7876_end_mask_0, x = K_full_out)[name = string("op_7876_cast_fp16")]; + tensor var_7886_begin_0 = const()[name = string("op_7886_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_7886_end_0 = const()[name = string("op_7886_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_7886_end_mask_0 = const()[name = string("op_7886_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7886_cast_fp16 = slice_by_index(begin = var_7886_begin_0, end = var_7886_end_0, end_mask = var_7886_end_mask_0, x = V_full_out)[name = string("op_7886_cast_fp16")]; + tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_22_reps_0 = const()[name = string("tile_22_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_44_cast_fp16 = transpose(perm = transpose_44_perm_0, x = var_7876_cast_fp16)[name = string("transpose_13")]; + tensor tile_22_cast_fp16 = tile(reps = tile_22_reps_0, x = transpose_44_cast_fp16)[name = string("tile_22_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_44_cast_fp16 = reshape(shape = concat_48, x = tile_22_cast_fp16)[name = string("reshape_44_cast_fp16")]; + tensor transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_45_cast_fp16 = transpose(perm = transpose_45_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_12")]; + tensor reshape_45_cast_fp16 = reshape(shape = concat_49, x = transpose_45_cast_fp16)[name = string("reshape_45_cast_fp16")]; + tensor transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_23_reps_0 = const()[name = string("tile_23_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_46_cast_fp16 = transpose(perm = transpose_46_perm_0, x = var_7886_cast_fp16)[name = string("transpose_11")]; + tensor tile_23_cast_fp16 = tile(reps = tile_23_reps_0, x = transpose_46_cast_fp16)[name = string("tile_23_cast_fp16")]; + tensor concat_50 = const()[name = string("concat_50"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_46_cast_fp16 = reshape(shape = concat_50, x = tile_23_cast_fp16)[name = string("reshape_46_cast_fp16")]; + tensor transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_51 = const()[name = string("concat_51"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_47_cast_fp16 = transpose(perm = transpose_47_perm_0, x = reshape_46_cast_fp16)[name = string("transpose_10")]; + tensor reshape_47_cast_fp16 = reshape(shape = concat_51, x = transpose_47_cast_fp16)[name = string("reshape_47_cast_fp16")]; + tensor V_expanded_perm_0 = const()[name = string("V_expanded_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_45_transpose_x_0 = const()[name = string("attn_weights_45_transpose_x_0"), val = bool(false)]; + bool attn_weights_45_transpose_y_0 = const()[name = string("attn_weights_45_transpose_y_0"), val = bool(false)]; + tensor transpose_83_cast_fp16 = transpose(perm = transpose_83_perm_0, x = reshape_45_cast_fp16)[name = string("transpose_9")]; + tensor attn_weights_45_cast_fp16 = matmul(transpose_x = attn_weights_45_transpose_x_0, transpose_y = attn_weights_45_transpose_y_0, x = q_cast_fp16, y = transpose_83_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + tensor x_227_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask_full)[name = string("x_227_cast_fp16")]; + tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; + bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; + tensor reduce_max_11 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = x_227_cast_fp16)[name = string("reduce_max_11")]; + tensor var_7921 = sub(x = x_227_cast_fp16, y = reduce_max_11)[name = string("op_7921")]; + tensor var_7927 = exp(x = var_7921)[name = string("op_7927")]; + tensor var_7937_axes_0 = const()[name = string("op_7937_axes_0"), val = tensor([-1])]; + bool var_7937_keep_dims_0 = const()[name = string("op_7937_keep_dims_0"), val = bool(true)]; + tensor var_7937 = reduce_sum(axes = var_7937_axes_0, keep_dims = var_7937_keep_dims_0, x = var_7927)[name = string("op_7937")]; + tensor var_7943_cast_fp16 = real_div(x = var_7927, y = var_7937)[name = string("op_7943_cast_fp16")]; + bool attn_output_67_transpose_x_0 = const()[name = string("attn_output_67_transpose_x_0"), val = bool(false)]; + bool attn_output_67_transpose_y_0 = const()[name = string("attn_output_67_transpose_y_0"), val = bool(false)]; + tensor V_expanded_cast_fp16 = transpose(perm = V_expanded_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_8")]; + tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_0, transpose_y = attn_output_67_transpose_y_0, x = var_7943_cast_fp16, y = V_expanded_cast_fp16)[name = string("attn_output_67_cast_fp16")]; + tensor var_7954 = const()[name = string("op_7954"), val = tensor([0, 2, 1, 3])]; + tensor var_7961 = const()[name = string("op_7961"), val = tensor([1, 3, -1])]; + tensor var_7955_cast_fp16 = transpose(perm = var_7954, x = attn_output_67_cast_fp16)[name = string("transpose_7")]; + tensor attn_output_69_cast_fp16 = reshape(shape = var_7961, x = var_7955_cast_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor var_7966 = const()[name = string("op_7966"), val = tensor([0, 2, 1])]; + string var_7982_pad_type_0 = const()[name = string("op_7982_pad_type_0"), val = string("valid")]; + int32 var_7982_groups_0 = const()[name = string("op_7982_groups_0"), val = int32(1)]; + tensor var_7982_strides_0 = const()[name = string("op_7982_strides_0"), val = tensor([1])]; + tensor var_7982_pad_0 = const()[name = string("op_7982_pad_0"), val = tensor([0, 0])]; + tensor var_7982_dilations_0 = const()[name = string("op_7982_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580378944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585621888))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7967_cast_fp16 = transpose(perm = var_7966, x = attn_output_69_cast_fp16)[name = string("transpose_6")]; + tensor var_7982_cast_fp16 = conv(dilations = var_7982_dilations_0, groups = var_7982_groups_0, pad = var_7982_pad_0, pad_type = var_7982_pad_type_0, strides = var_7982_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7967_cast_fp16)[name = string("op_7982_cast_fp16")]; + tensor var_7986 = const()[name = string("op_7986"), val = tensor([0, 2, 1])]; + int32 var_7992 = const()[name = string("op_7992"), val = int32(-1)]; + fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_231_cast_fp16 = transpose(perm = var_7986, x = var_7982_cast_fp16)[name = string("transpose_5")]; + tensor var_7994_cast_fp16 = mul(x = x_231_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_7994_cast_fp16")]; + bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; + tensor input_341_cast_fp16 = concat(axis = var_7992, interleave = input_341_interleave_0, values = (x_231_cast_fp16, var_7994_cast_fp16))[name = string("input_341_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_7989_to_fp16 = const()[name = string("op_7989_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_7989_to_fp16, x = input_341_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor var_7999_split_sizes_0 = const()[name = string("op_7999_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7999_axis_0 = const()[name = string("op_7999_axis_0"), val = int32(-1)]; + tensor var_7999_cast_fp16_0, tensor var_7999_cast_fp16_1 = split(axis = var_7999_axis_0, split_sizes = var_7999_split_sizes_0, x = normed_325_cast_fp16)[name = string("op_7999_cast_fp16")]; + tensor layers_11_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585624512)))]; + tensor attn_output_cast_fp16 = mul(x = var_7999_cast_fp16_0, y = layers_11_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_233_cast_fp16 = add(x = x_219_cast_fp16, y = attn_output_cast_fp16)[name = string("x_233_cast_fp16")]; + int32 var_8008 = const()[name = string("op_8008"), val = int32(-1)]; + fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8010_cast_fp16 = mul(x = x_233_cast_fp16, y = const_137_promoted_to_fp16)[name = string("op_8010_cast_fp16")]; + bool input_343_interleave_0 = const()[name = string("input_343_interleave_0"), val = bool(false)]; + tensor input_343_cast_fp16 = concat(axis = var_8008, interleave = input_343_interleave_0, values = (x_233_cast_fp16, var_8010_cast_fp16))[name = string("input_343_cast_fp16")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_8005_to_fp16 = const()[name = string("op_8005_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_8005_to_fp16, x = input_343_cast_fp16)[name = string("normed_329_cast_fp16")]; + tensor var_8015_split_sizes_0 = const()[name = string("op_8015_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8015_axis_0 = const()[name = string("op_8015_axis_0"), val = int32(-1)]; + tensor var_8015_cast_fp16_0, tensor var_8015_cast_fp16_1 = split(axis = var_8015_axis_0, split_sizes = var_8015_split_sizes_0, x = normed_329_cast_fp16)[name = string("op_8015_cast_fp16")]; + tensor layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585629696)))]; + tensor h_69_cast_fp16 = mul(x = var_8015_cast_fp16_0, y = layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_69_cast_fp16")]; + tensor var_8026 = const()[name = string("op_8026"), val = tensor([0, 2, 1])]; + tensor input_345_axes_0 = const()[name = string("input_345_axes_0"), val = tensor([2])]; + tensor var_8027 = transpose(perm = var_8026, x = h_69_cast_fp16)[name = string("transpose_4")]; + tensor input_345 = expand_dims(axes = input_345_axes_0, x = var_8027)[name = string("input_345")]; + string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; + tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; + tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; + int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; + tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = layers_11_mlp_gate_proj_weight_palettized, x = input_345)[name = string("gate_45")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_11_mlp_up_proj_weight_palettized, x = input_345)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_45)[name = string("gate")]; + tensor input_347 = mul(x = gate, y = up)[name = string("input_347")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_11_mlp_down_proj_weight_palettized, x = input_347)[name = string("mlp_out")]; + tensor var_8067_axes_0 = const()[name = string("op_8067_axes_0"), val = tensor([2])]; + tensor var_8067 = squeeze(axes = var_8067_axes_0, x = mlp_out)[name = string("op_8067")]; + tensor var_8071 = const()[name = string("op_8071"), val = tensor([0, 2, 1])]; + int32 var_8077 = const()[name = string("op_8077"), val = int32(-1)]; + fp16 const_138_promoted = const()[name = string("const_138_promoted"), val = fp16(-0x1p+0)]; + tensor x_235 = transpose(perm = var_8071, x = var_8067)[name = string("transpose_3")]; + tensor var_8079 = mul(x = x_235, y = const_138_promoted)[name = string("op_8079")]; + bool input_349_interleave_0 = const()[name = string("input_349_interleave_0"), val = bool(false)]; + tensor input_349 = concat(axis = var_8077, interleave = input_349_interleave_0, values = (x_235, var_8079))[name = string("input_349")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_8074_to_fp16 = const()[name = string("op_8074_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_8074_to_fp16, x = input_349)[name = string("normed_333_cast_fp16")]; + tensor var_8084_split_sizes_0 = const()[name = string("op_8084_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8084_axis_0 = const()[name = string("op_8084_axis_0"), val = int32(-1)]; + tensor var_8084_0, tensor var_8084_1 = split(axis = var_8084_axis_0, split_sizes = var_8084_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_8084")]; + tensor hidden_states_113 = mul(x = var_8084_0, y = layers_11_post_feedforward_layernorm_weight)[name = string("hidden_states_113")]; + tensor hidden_states_115_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_113)[name = string("hidden_states_115_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 2816])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 3, 3072])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined_out)[name = string("per_layer_slice_cast_fp16")]; + tensor var_8112 = const()[name = string("op_8112"), val = tensor([0, 2, 1])]; + tensor input_351_axes_0 = const()[name = string("input_351_axes_0"), val = tensor([2])]; + tensor var_8113 = transpose(perm = var_8112, x = hidden_states_115_cast_fp16)[name = string("transpose_2")]; + tensor input_351 = expand_dims(axes = input_351_axes_0, x = var_8113)[name = string("input_351")]; + string gated_67_pad_type_0 = const()[name = string("gated_67_pad_type_0"), val = string("valid")]; + tensor gated_67_strides_0 = const()[name = string("gated_67_strides_0"), val = tensor([1, 1])]; + tensor gated_67_pad_0 = const()[name = string("gated_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_67_dilations_0 = const()[name = string("gated_67_dilations_0"), val = tensor([1, 1])]; + int32 gated_67_groups_0 = const()[name = string("gated_67_groups_0"), val = int32(1)]; + tensor gated_67 = conv(dilations = gated_67_dilations_0, groups = gated_67_groups_0, pad = gated_67_pad_0, pad_type = gated_67_pad_type_0, strides = gated_67_strides_0, weight = layers_11_per_layer_input_gate_weight_palettized, x = input_351)[name = string("gated_67")]; + string gated_69_mode_0 = const()[name = string("gated_69_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_69 = gelu(mode = gated_69_mode_0, x = gated_67)[name = string("gated_69")]; + tensor var_8132 = const()[name = string("op_8132"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_8133_cast_fp16 = transpose(perm = var_8132, x = per_layer_slice_cast_fp16)[name = string("transpose_1")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_8133_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_353_cast_fp16 = mul(x = gated_69, y = per_layer_slice_conv_cast_fp16)[name = string("input_353_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_11_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585634880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585962624))))[name = string("layers_11_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_11_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_353_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_8149_axes_0 = const()[name = string("op_8149_axes_0"), val = tensor([2])]; + tensor var_8149_cast_fp16 = squeeze(axes = var_8149_axes_0, x = gated_cast_fp16)[name = string("op_8149_cast_fp16")]; + tensor var_8153 = const()[name = string("op_8153"), val = tensor([0, 2, 1])]; + int32 var_8159 = const()[name = string("op_8159"), val = int32(-1)]; + fp16 const_139_promoted_to_fp16 = const()[name = string("const_139_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_cast_fp16 = transpose(perm = var_8153, x = var_8149_cast_fp16)[name = string("transpose_0")]; + tensor var_8161_cast_fp16 = mul(x = x_cast_fp16, y = const_139_promoted_to_fp16)[name = string("op_8161_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_8159, interleave = input_interleave_0, values = (x_cast_fp16, var_8161_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_8156_to_fp16 = const()[name = string("op_8156_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_8156_to_fp16, x = input_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor var_8166_split_sizes_0 = const()[name = string("op_8166_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8166_axis_0 = const()[name = string("op_8166_axis_0"), val = int32(-1)]; + tensor var_8166_cast_fp16_0, tensor var_8166_cast_fp16_1 = split(axis = var_8166_axis_0, split_sizes = var_8166_split_sizes_0, x = normed_337_cast_fp16)[name = string("op_8166_cast_fp16")]; + tensor layers_11_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_11_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585965248)))]; + tensor hidden_states_119_cast_fp16 = mul(x = var_8166_cast_fp16_0, y = layers_11_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_115_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = tensor([0x1.0ap-1])]; + tensor hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_8176_cast_fp16")]; + } -> (hidden_states_out, K_sliding_out, V_sliding_out, K_full_out, V_full_out, per_layer_combined_out); +} \ No newline at end of file diff --git a/chunk1.mlmodelc/weights/weight.bin b/chunk1.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..53ec3a0699a82617697453305636b207603c4050 --- /dev/null +++ b/chunk1.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c868123b2e7b2182d97a0aaca1d4e33f861ee446eac5b038bdb9f0e2c6e787 +size 585970432 diff --git a/chunk2.mlmodelc/analytics/coremldata.bin b/chunk2.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..61743beff7f654283c262d418dd79a7213652857 --- /dev/null +++ b/chunk2.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c032a454d2eaeea9fd5bfdfe3a2caf53e1a79d401fe8007c986abcc44469a19 +size 243 diff --git a/chunk2.mlmodelc/coremldata.bin b/chunk2.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..695ba28f5c68f164a8d779ebf6f31049bcc05106 --- /dev/null +++ b/chunk2.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8046d35bb019573a7541c830b6b712d5176c199f51e2b1fec3a17342d86a3ac6 +size 1471 diff --git a/chunk2.mlmodelc/model.mil b/chunk2.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..fd3317ef9827b6ddda940f06043caaa8a2823b40 --- /dev/null +++ b/chunk2.mlmodelc/model.mil @@ -0,0 +1,8361 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func decode_q1(tensor K_full_in, tensor K_sliding_in, tensor V_full_in, tensor V_sliding_in, tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor per_layer_combined, tensor sin_f, tensor sin_s, tensor update_mask) { + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2621568))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2623680)))]; + tensor layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2624256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3279680))))[name = string("layers_0_self_attn_k_proj_weight_palettized")]; + tensor layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3280256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3935680))))[name = string("layers_0_self_attn_v_proj_weight_palettized")]; + tensor layers_0_self_attn_k_norm_weight = const()[name = string("layers_0_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3936256)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3936832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17044096))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17054400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30161664))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30171968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43279232))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43281856)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43287040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43614784))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43615104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46236608))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_self_attn_q_norm_weight = const()[name = string("layers_1_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46238720)))]; + tensor layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46239296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46894720))))[name = string("layers_1_self_attn_k_proj_weight_palettized")]; + tensor layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46895296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47550720))))[name = string("layers_1_self_attn_v_proj_weight_palettized")]; + tensor layers_1_self_attn_k_norm_weight = const()[name = string("layers_1_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47551296)))]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47551872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60659136))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60669440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73776704))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73787008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86894272))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86896896)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86902080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87229824))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87230144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89851648))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_self_attn_q_norm_weight = const()[name = string("layers_2_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89853760)))]; + tensor layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89854336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90509760))))[name = string("layers_2_self_attn_k_proj_weight_palettized")]; + tensor layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90510336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91165760))))[name = string("layers_2_self_attn_v_proj_weight_palettized")]; + tensor layers_2_self_attn_k_norm_weight = const()[name = string("layers_2_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91166336)))]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91166912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104274176))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104284480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117391744))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117402048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130509312))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130511936)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130844864))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130845184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133466688))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_self_attn_q_norm_weight = const()[name = string("layers_3_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133468800)))]; + tensor layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133469376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134124800))))[name = string("layers_3_self_attn_k_proj_weight_palettized")]; + tensor layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134125376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134780800))))[name = string("layers_3_self_attn_v_proj_weight_palettized")]; + tensor layers_3_self_attn_k_norm_weight = const()[name = string("layers_3_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134781376)))]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134781952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147889216))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147899520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161006784))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161017088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174124352))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174126976)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174132160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174459904))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174460224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177081728))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_self_attn_q_norm_weight = const()[name = string("layers_4_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177083840)))]; + tensor layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177084416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177739840))))[name = string("layers_4_self_attn_k_proj_weight_palettized")]; + tensor layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177740416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178395840))))[name = string("layers_4_self_attn_v_proj_weight_palettized")]; + tensor layers_4_self_attn_k_norm_weight = const()[name = string("layers_4_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178396416)))]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178396992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191504256))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191514560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204621824))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204632128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217739392))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217742016)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217747200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218074944))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218075264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223318208))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_self_attn_q_norm_weight = const()[name = string("layers_5_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223322368)))]; + tensor layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223323456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224634240))))[name = string("layers_5_self_attn_k_proj_weight_palettized")]; + tensor layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224635328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225946112))))[name = string("layers_5_self_attn_v_proj_weight_palettized")]; + tensor layers_5_self_attn_k_norm_weight = const()[name = string("layers_5_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225947200)))]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225948288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239055552))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239065856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252173120))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252183424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265290688))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265293312)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265298496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265626240))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265626560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268248064))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268250176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268905600))))[name = string("layers_6_self_attn_k_proj_weight_palettized")]; + tensor layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268906176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269561600))))[name = string("layers_6_self_attn_v_proj_weight_palettized")]; + tensor layers_6_self_attn_k_norm_weight = const()[name = string("layers_6_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269562176)))]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269562752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282670016))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282680320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295787584))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295797888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308905152))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308907776)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308912960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309240704))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309241024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311862528))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_self_attn_q_norm_weight = const()[name = string("layers_7_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311864640)))]; + tensor layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311865216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312520640))))[name = string("layers_7_self_attn_k_proj_weight_palettized")]; + tensor layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312521216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313176640))))[name = string("layers_7_self_attn_v_proj_weight_palettized")]; + tensor layers_7_self_attn_k_norm_weight = const()[name = string("layers_7_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313177216)))]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313177792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326285056))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326295360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339402624))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339412928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352520192))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352522816)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352528000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352855744))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352856064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355477568))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355479680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356135104))))[name = string("layers_8_self_attn_k_proj_weight_palettized")]; + tensor layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356135680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356791104))))[name = string("layers_8_self_attn_v_proj_weight_palettized")]; + tensor layers_8_self_attn_k_norm_weight = const()[name = string("layers_8_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356791680)))]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356792256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369899520))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369909824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383017088))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383027392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396134656))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396137280)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396142464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396470208))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + tensor layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396470528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399092032))))[name = string("layers_9_self_attn_q_proj_weight_palettized")]; + tensor layers_9_self_attn_q_norm_weight = const()[name = string("layers_9_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399094144)))]; + tensor layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399094720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399750144))))[name = string("layers_9_self_attn_k_proj_weight_palettized")]; + tensor layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399750720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400406144))))[name = string("layers_9_self_attn_v_proj_weight_palettized")]; + tensor layers_9_self_attn_k_norm_weight = const()[name = string("layers_9_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400406720)))]; + tensor layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400407296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413514560))))[name = string("layers_9_mlp_gate_proj_weight_palettized")]; + tensor layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413524864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426632128))))[name = string("layers_9_mlp_up_proj_weight_palettized")]; + tensor layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426642432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439749696))))[name = string("layers_9_mlp_down_proj_weight_palettized")]; + tensor layers_9_post_feedforward_layernorm_weight = const()[name = string("layers_9_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439752320)))]; + tensor layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439757504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440085248))))[name = string("layers_9_per_layer_input_gate_weight_palettized")]; + tensor layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440085568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442707072))))[name = string("layers_10_self_attn_q_proj_weight_palettized")]; + tensor layers_10_self_attn_q_norm_weight = const()[name = string("layers_10_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442709184)))]; + tensor layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442709760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443365184))))[name = string("layers_10_self_attn_k_proj_weight_palettized")]; + tensor layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443365760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444021184))))[name = string("layers_10_self_attn_v_proj_weight_palettized")]; + tensor layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444021760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457129024))))[name = string("layers_10_mlp_gate_proj_weight_palettized")]; + tensor layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457139328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470246592))))[name = string("layers_10_mlp_up_proj_weight_palettized")]; + tensor layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470256896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483364160))))[name = string("layers_10_mlp_down_proj_weight_palettized")]; + tensor layers_10_post_feedforward_layernorm_weight = const()[name = string("layers_10_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483366784)))]; + tensor layers_10_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483371968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483699712))))[name = string("layers_10_per_layer_input_gate_weight_palettized")]; + tensor layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483700032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488942976))))[name = string("layers_11_self_attn_q_proj_weight_palettized")]; + tensor layers_11_self_attn_q_norm_weight = const()[name = string("layers_11_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488947136)))]; + tensor layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488948224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490259008))))[name = string("layers_11_self_attn_k_proj_weight_palettized")]; + tensor layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490260096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491570880))))[name = string("layers_11_self_attn_v_proj_weight_palettized")]; + tensor layers_11_self_attn_k_norm_weight = const()[name = string("layers_11_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491571968)))]; + tensor layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491573056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504680320))))[name = string("layers_11_mlp_gate_proj_weight_palettized")]; + tensor layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504690624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517797888))))[name = string("layers_11_mlp_up_proj_weight_palettized")]; + tensor layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517808192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530915456))))[name = string("layers_11_mlp_down_proj_weight_palettized")]; + tensor layers_11_post_feedforward_layernorm_weight = const()[name = string("layers_11_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530918080)))]; + tensor layers_11_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530923264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531251008))))[name = string("layers_11_per_layer_input_gate_weight_palettized")]; + tensor var_736_begin_0 = const()[name = string("op_736_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_736_end_0 = const()[name = string("op_736_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_736_end_mask_0 = const()[name = string("op_736_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_736_squeeze_mask_0 = const()[name = string("op_736_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_736_cast_fp16 = slice_by_index(begin = var_736_begin_0, end = var_736_end_0, end_mask = var_736_end_mask_0, squeeze_mask = var_736_squeeze_mask_0, x = K_sliding_in)[name = string("op_736_cast_fp16")]; + tensor K_sliding_slot_1_axes_0 = const()[name = string("K_sliding_slot_1_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_1_cast_fp16 = expand_dims(axes = K_sliding_slot_1_axes_0, x = var_736_cast_fp16)[name = string("K_sliding_slot_1_cast_fp16")]; + tensor var_741_begin_0 = const()[name = string("op_741_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_741_end_0 = const()[name = string("op_741_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_741_end_mask_0 = const()[name = string("op_741_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_741_squeeze_mask_0 = const()[name = string("op_741_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_741_cast_fp16 = slice_by_index(begin = var_741_begin_0, end = var_741_end_0, end_mask = var_741_end_mask_0, squeeze_mask = var_741_squeeze_mask_0, x = V_sliding_in)[name = string("op_741_cast_fp16")]; + tensor V_sliding_slot_1_axes_0 = const()[name = string("V_sliding_slot_1_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_1_cast_fp16 = expand_dims(axes = V_sliding_slot_1_axes_0, x = var_741_cast_fp16)[name = string("V_sliding_slot_1_cast_fp16")]; + int32 var_748 = const()[name = string("op_748"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_750_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_750_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_748, interleave = input_1_interleave_0, values = (hidden_states, var_750_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_745_to_fp16 = const()[name = string("op_745_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_745_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_755_split_sizes_0 = const()[name = string("op_755_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_755_axis_0 = const()[name = string("op_755_axis_0"), val = int32(-1)]; + tensor var_755_cast_fp16_0, tensor var_755_cast_fp16_1 = split(axis = var_755_axis_0, split_sizes = var_755_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_755_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531251328)))]; + tensor h_1_cast_fp16 = mul(x = var_755_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_761 = const()[name = string("op_761"), val = tensor([0, 2, 1])]; + tensor var_764_axes_0 = const()[name = string("op_764_axes_0"), val = tensor([2])]; + tensor var_762_cast_fp16 = transpose(perm = var_761, x = h_1_cast_fp16)[name = string("transpose_215")]; + tensor var_764_cast_fp16 = expand_dims(axes = var_764_axes_0, x = var_762_cast_fp16)[name = string("op_764_cast_fp16")]; + string var_780_pad_type_0 = const()[name = string("op_780_pad_type_0"), val = string("valid")]; + tensor var_780_strides_0 = const()[name = string("op_780_strides_0"), val = tensor([1, 1])]; + tensor var_780_pad_0 = const()[name = string("op_780_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_780_dilations_0 = const()[name = string("op_780_dilations_0"), val = tensor([1, 1])]; + int32 var_780_groups_0 = const()[name = string("op_780_groups_0"), val = int32(1)]; + tensor var_780 = conv(dilations = var_780_dilations_0, groups = var_780_groups_0, pad = var_780_pad_0, pad_type = var_780_pad_type_0, strides = var_780_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_764_cast_fp16)[name = string("op_780")]; + tensor var_785 = const()[name = string("op_785"), val = tensor([1, 8, 256, 1])]; + tensor var_786 = reshape(shape = var_785, x = var_780)[name = string("op_786")]; + tensor var_791 = const()[name = string("op_791"), val = tensor([0, 1, 3, 2])]; + tensor var_801 = const()[name = string("op_801"), val = tensor([1, 8, 256])]; + tensor var_792 = transpose(perm = var_791, x = var_786)[name = string("transpose_214")]; + tensor x_1 = reshape(shape = var_801, x = var_792)[name = string("x_1")]; + int32 var_807 = const()[name = string("op_807"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_809 = mul(x = x_1, y = const_1_promoted)[name = string("op_809")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_807, interleave = input_5_interleave_0, values = (x_1, var_809))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_804_to_fp16 = const()[name = string("op_804_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_804_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor var_814_split_sizes_0 = const()[name = string("op_814_split_sizes_0"), val = tensor([256, 256])]; + int32 var_814_axis_0 = const()[name = string("op_814_axis_0"), val = int32(-1)]; + tensor var_814_0, tensor var_814_1 = split(axis = var_814_axis_0, split_sizes = var_814_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_814")]; + tensor var_816 = mul(x = var_814_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_816")]; + tensor var_821 = const()[name = string("op_821"), val = tensor([1, 8, 1, 256])]; + tensor q_3 = reshape(shape = var_821, x = var_816)[name = string("q_3")]; + tensor var_823_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_823_cast_fp16")]; + tensor var_824_split_sizes_0 = const()[name = string("op_824_split_sizes_0"), val = tensor([128, 128])]; + int32 var_824_axis_0 = const()[name = string("op_824_axis_0"), val = int32(-1)]; + tensor var_824_0, tensor var_824_1 = split(axis = var_824_axis_0, split_sizes = var_824_split_sizes_0, x = q_3)[name = string("op_824")]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_826 = mul(x = var_824_1, y = const_2_promoted)[name = string("op_826")]; + int32 var_828 = const()[name = string("op_828"), val = int32(-1)]; + bool var_829_interleave_0 = const()[name = string("op_829_interleave_0"), val = bool(false)]; + tensor var_829 = concat(axis = var_828, interleave = var_829_interleave_0, values = (var_826, var_824_0))[name = string("op_829")]; + tensor var_830_cast_fp16 = mul(x = var_829, y = sin_s)[name = string("op_830_cast_fp16")]; + tensor q_7_cast_fp16 = add(x = var_823_cast_fp16, y = var_830_cast_fp16)[name = string("q_7_cast_fp16")]; + string var_843_pad_type_0 = const()[name = string("op_843_pad_type_0"), val = string("valid")]; + tensor var_843_strides_0 = const()[name = string("op_843_strides_0"), val = tensor([1, 1])]; + tensor var_843_pad_0 = const()[name = string("op_843_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_843_dilations_0 = const()[name = string("op_843_dilations_0"), val = tensor([1, 1])]; + int32 var_843_groups_0 = const()[name = string("op_843_groups_0"), val = int32(1)]; + tensor var_843 = conv(dilations = var_843_dilations_0, groups = var_843_groups_0, pad = var_843_pad_0, pad_type = var_843_pad_type_0, strides = var_843_strides_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = var_764_cast_fp16)[name = string("op_843")]; + tensor var_848 = const()[name = string("op_848"), val = tensor([1, 2, 256, 1])]; + tensor var_849 = reshape(shape = var_848, x = var_843)[name = string("op_849")]; + tensor var_854 = const()[name = string("op_854"), val = tensor([0, 1, 3, 2])]; + string var_871_pad_type_0 = const()[name = string("op_871_pad_type_0"), val = string("valid")]; + tensor var_871_strides_0 = const()[name = string("op_871_strides_0"), val = tensor([1, 1])]; + tensor var_871_pad_0 = const()[name = string("op_871_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_871_dilations_0 = const()[name = string("op_871_dilations_0"), val = tensor([1, 1])]; + int32 var_871_groups_0 = const()[name = string("op_871_groups_0"), val = int32(1)]; + tensor var_871 = conv(dilations = var_871_dilations_0, groups = var_871_groups_0, pad = var_871_pad_0, pad_type = var_871_pad_type_0, strides = var_871_strides_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = var_764_cast_fp16)[name = string("op_871")]; + tensor var_876 = const()[name = string("op_876"), val = tensor([1, 2, 256, 1])]; + tensor var_877 = reshape(shape = var_876, x = var_871)[name = string("op_877")]; + tensor var_882 = const()[name = string("op_882"), val = tensor([0, 1, 3, 2])]; + tensor var_892 = const()[name = string("op_892"), val = tensor([1, 2, 256])]; + tensor var_855 = transpose(perm = var_854, x = var_849)[name = string("transpose_213")]; + tensor x_3 = reshape(shape = var_892, x = var_855)[name = string("x_3")]; + int32 var_898 = const()[name = string("op_898"), val = int32(-1)]; + fp16 const_3_promoted = const()[name = string("const_3_promoted"), val = fp16(-0x1p+0)]; + tensor var_900 = mul(x = x_3, y = const_3_promoted)[name = string("op_900")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_898, interleave = input_7_interleave_0, values = (x_3, var_900))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_895_to_fp16 = const()[name = string("op_895_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_895_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor var_905_split_sizes_0 = const()[name = string("op_905_split_sizes_0"), val = tensor([256, 256])]; + int32 var_905_axis_0 = const()[name = string("op_905_axis_0"), val = int32(-1)]; + tensor var_905_0, tensor var_905_1 = split(axis = var_905_axis_0, split_sizes = var_905_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_905")]; + tensor var_907 = mul(x = var_905_0, y = layers_0_self_attn_k_norm_weight)[name = string("op_907")]; + tensor var_912 = const()[name = string("op_912"), val = tensor([1, 2, 1, 256])]; + tensor q_5 = reshape(shape = var_912, x = var_907)[name = string("q_5")]; + fp16 var_914_promoted = const()[name = string("op_914_promoted"), val = fp16(0x1p+1)]; + tensor var_883 = transpose(perm = var_882, x = var_877)[name = string("transpose_212")]; + tensor var_915 = pow(x = var_883, y = var_914_promoted)[name = string("op_915")]; + tensor var_920_axes_0 = const()[name = string("op_920_axes_0"), val = tensor([-1])]; + bool var_920_keep_dims_0 = const()[name = string("op_920_keep_dims_0"), val = bool(true)]; + tensor var_920 = reduce_mean(axes = var_920_axes_0, keep_dims = var_920_keep_dims_0, x = var_915)[name = string("op_920")]; + fp16 var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_1_cast_fp16 = add(x = var_920, y = var_922_to_fp16)[name = string("mean_sq_1_cast_fp16")]; + fp32 var_924_epsilon_0 = const()[name = string("op_924_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_924_cast_fp16 = rsqrt(epsilon = var_924_epsilon_0, x = mean_sq_1_cast_fp16)[name = string("op_924_cast_fp16")]; + tensor input_11_cast_fp16 = mul(x = var_883, y = var_924_cast_fp16)[name = string("input_11_cast_fp16")]; + tensor var_926_cast_fp16 = mul(x = q_5, y = cos_s)[name = string("op_926_cast_fp16")]; + tensor var_927_split_sizes_0 = const()[name = string("op_927_split_sizes_0"), val = tensor([128, 128])]; + int32 var_927_axis_0 = const()[name = string("op_927_axis_0"), val = int32(-1)]; + tensor var_927_0, tensor var_927_1 = split(axis = var_927_axis_0, split_sizes = var_927_split_sizes_0, x = q_5)[name = string("op_927")]; + fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; + tensor var_929 = mul(x = var_927_1, y = const_4_promoted)[name = string("op_929")]; + int32 var_931 = const()[name = string("op_931"), val = int32(-1)]; + bool var_932_interleave_0 = const()[name = string("op_932_interleave_0"), val = bool(false)]; + tensor var_932 = concat(axis = var_931, interleave = var_932_interleave_0, values = (var_929, var_927_0))[name = string("op_932")]; + tensor var_933_cast_fp16 = mul(x = var_932, y = sin_s)[name = string("op_933_cast_fp16")]; + tensor input_9_cast_fp16 = add(x = var_926_cast_fp16, y = var_933_cast_fp16)[name = string("input_9_cast_fp16")]; + tensor k_padded_1_pad_0 = const()[name = string("k_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_1_mode_0 = const()[name = string("k_padded_1_mode_0"), val = string("constant")]; + fp16 const_5_to_fp16 = const()[name = string("const_5_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_1_cast_fp16 = pad(constant_val = const_5_to_fp16, mode = k_padded_1_mode_0, pad = k_padded_1_pad_0, x = input_9_cast_fp16)[name = string("k_padded_1_cast_fp16")]; + tensor v_padded_1_pad_0 = const()[name = string("v_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_1_mode_0 = const()[name = string("v_padded_1_mode_0"), val = string("constant")]; + fp16 const_6_to_fp16 = const()[name = string("const_6_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_1_cast_fp16 = pad(constant_val = const_6_to_fp16, mode = v_padded_1_mode_0, pad = v_padded_1_pad_0, x = input_11_cast_fp16)[name = string("v_padded_1_cast_fp16")]; + tensor var_962_begin_0 = const()[name = string("op_962_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_962_end_0 = const()[name = string("op_962_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_962_end_mask_0 = const()[name = string("op_962_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_962_cast_fp16 = slice_by_index(begin = var_962_begin_0, end = var_962_end_0, end_mask = var_962_end_mask_0, x = K_sliding_slot_1_cast_fp16)[name = string("op_962_cast_fp16")]; + int32 var_969 = const()[name = string("op_969"), val = int32(2)]; + bool K_sliding_out_1_interleave_0 = const()[name = string("K_sliding_out_1_interleave_0"), val = bool(false)]; + tensor K_sliding_out_1_cast_fp16 = concat(axis = var_969, interleave = K_sliding_out_1_interleave_0, values = (var_962_cast_fp16, k_padded_1_cast_fp16))[name = string("K_sliding_out_1_cast_fp16")]; + tensor var_985_begin_0 = const()[name = string("op_985_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_985_end_0 = const()[name = string("op_985_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_985_end_mask_0 = const()[name = string("op_985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_985_cast_fp16 = slice_by_index(begin = var_985_begin_0, end = var_985_end_0, end_mask = var_985_end_mask_0, x = V_sliding_slot_1_cast_fp16)[name = string("op_985_cast_fp16")]; + int32 var_992 = const()[name = string("op_992"), val = int32(2)]; + bool V_sliding_out_1_interleave_0 = const()[name = string("V_sliding_out_1_interleave_0"), val = bool(false)]; + tensor V_sliding_out_1_cast_fp16 = concat(axis = var_992, interleave = V_sliding_out_1_interleave_0, values = (var_985_cast_fp16, v_padded_1_cast_fp16))[name = string("V_sliding_out_1_cast_fp16")]; + tensor K_for_attn_1_begin_0 = const()[name = string("K_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_1_end_0 = const()[name = string("K_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_1_end_mask_0 = const()[name = string("K_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_1_cast_fp16 = slice_by_index(begin = K_for_attn_1_begin_0, end = K_for_attn_1_end_0, end_mask = K_for_attn_1_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("K_for_attn_1_cast_fp16")]; + tensor V_for_attn_1_begin_0 = const()[name = string("V_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_1_end_0 = const()[name = string("V_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_1_end_mask_0 = const()[name = string("V_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_1_cast_fp16 = slice_by_index(begin = V_for_attn_1_begin_0, end = V_for_attn_1_end_0, end_mask = V_for_attn_1_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("V_for_attn_1_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = K_for_attn_1_cast_fp16)[name = string("transpose_211")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_210")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = V_for_attn_1_cast_fp16)[name = string("transpose_209")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_208")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_48_cast_fp16 = transpose(perm = transpose_48_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_207")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_7_cast_fp16, y = transpose_48_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_7_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_7_cast_fp16)[name = string("reduce_max_0")]; + tensor var_1033 = sub(x = x_7_cast_fp16, y = reduce_max_0)[name = string("op_1033")]; + tensor var_1039 = exp(x = var_1033)[name = string("op_1039")]; + tensor var_1049_axes_0 = const()[name = string("op_1049_axes_0"), val = tensor([-1])]; + bool var_1049_keep_dims_0 = const()[name = string("op_1049_keep_dims_0"), val = bool(true)]; + tensor var_1049 = reduce_sum(axes = var_1049_axes_0, keep_dims = var_1049_keep_dims_0, x = var_1039)[name = string("op_1049")]; + tensor var_1055_cast_fp16 = real_div(x = var_1039, y = var_1049)[name = string("op_1055_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_206")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_1055_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_1066 = const()[name = string("op_1066"), val = tensor([0, 2, 1, 3])]; + tensor var_1073 = const()[name = string("op_1073"), val = tensor([1, 1, -1])]; + tensor var_1067_cast_fp16 = transpose(perm = var_1066, x = attn_output_1_cast_fp16)[name = string("transpose_205")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_1073, x = var_1067_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_1078 = const()[name = string("op_1078"), val = tensor([0, 2, 1])]; + string var_1094_pad_type_0 = const()[name = string("op_1094_pad_type_0"), val = string("valid")]; + int32 var_1094_groups_0 = const()[name = string("op_1094_groups_0"), val = int32(1)]; + tensor var_1094_strides_0 = const()[name = string("op_1094_strides_0"), val = tensor([1])]; + tensor var_1094_pad_0 = const()[name = string("op_1094_pad_0"), val = tensor([0, 0])]; + tensor var_1094_dilations_0 = const()[name = string("op_1094_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531256512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533878016))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1079_cast_fp16 = transpose(perm = var_1078, x = attn_output_3_cast_fp16)[name = string("transpose_204")]; + tensor var_1094_cast_fp16 = conv(dilations = var_1094_dilations_0, groups = var_1094_groups_0, pad = var_1094_pad_0, pad_type = var_1094_pad_type_0, strides = var_1094_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1079_cast_fp16)[name = string("op_1094_cast_fp16")]; + tensor var_1098 = const()[name = string("op_1098"), val = tensor([0, 2, 1])]; + int32 var_1104 = const()[name = string("op_1104"), val = int32(-1)]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_11_cast_fp16 = transpose(perm = var_1098, x = var_1094_cast_fp16)[name = string("transpose_203")]; + tensor var_1106_cast_fp16 = mul(x = x_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_1106_cast_fp16")]; + bool input_15_interleave_0 = const()[name = string("input_15_interleave_0"), val = bool(false)]; + tensor input_15_cast_fp16 = concat(axis = var_1104, interleave = input_15_interleave_0, values = (x_11_cast_fp16, var_1106_cast_fp16))[name = string("input_15_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_1101_to_fp16, x = input_15_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor var_1111_split_sizes_0 = const()[name = string("op_1111_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1111_axis_0 = const()[name = string("op_1111_axis_0"), val = int32(-1)]; + tensor var_1111_cast_fp16_0, tensor var_1111_cast_fp16_1 = split(axis = var_1111_axis_0, split_sizes = var_1111_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_1111_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533880640)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_1111_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_13_cast_fp16")]; + int32 var_1120 = const()[name = string("op_1120"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1122_cast_fp16 = mul(x = x_13_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_1122_cast_fp16")]; + bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; + tensor input_17_cast_fp16 = concat(axis = var_1120, interleave = input_17_interleave_0, values = (x_13_cast_fp16, var_1122_cast_fp16))[name = string("input_17_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_1117_to_fp16 = const()[name = string("op_1117_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_1117_to_fp16, x = input_17_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor var_1127_split_sizes_0 = const()[name = string("op_1127_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1127_axis_0 = const()[name = string("op_1127_axis_0"), val = int32(-1)]; + tensor var_1127_cast_fp16_0, tensor var_1127_cast_fp16_1 = split(axis = var_1127_axis_0, split_sizes = var_1127_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_1127_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533885824)))]; + tensor h_3_cast_fp16 = mul(x = var_1127_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_1138 = const()[name = string("op_1138"), val = tensor([0, 2, 1])]; + tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([2])]; + tensor var_1139 = transpose(perm = var_1138, x = h_3_cast_fp16)[name = string("transpose_202")]; + tensor input_19 = expand_dims(axes = input_19_axes_0, x = var_1139)[name = string("input_19")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_19)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_19)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_21 = mul(x = gate_3, y = up_1)[name = string("input_21")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_21)[name = string("mlp_out_1")]; + tensor var_1179_axes_0 = const()[name = string("op_1179_axes_0"), val = tensor([2])]; + tensor var_1179 = squeeze(axes = var_1179_axes_0, x = mlp_out_1)[name = string("op_1179")]; + tensor var_1183 = const()[name = string("op_1183"), val = tensor([0, 2, 1])]; + int32 var_1189 = const()[name = string("op_1189"), val = int32(-1)]; + fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; + tensor x_15 = transpose(perm = var_1183, x = var_1179)[name = string("transpose_201")]; + tensor var_1191 = mul(x = x_15, y = const_9_promoted)[name = string("op_1191")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23 = concat(axis = var_1189, interleave = input_23_interleave_0, values = (x_15, var_1191))[name = string("input_23")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_1186_to_fp16 = const()[name = string("op_1186_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_1186_to_fp16, x = input_23)[name = string("normed_21_cast_fp16")]; + tensor var_1196_split_sizes_0 = const()[name = string("op_1196_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1196_axis_0 = const()[name = string("op_1196_axis_0"), val = int32(-1)]; + tensor var_1196_0, tensor var_1196_1 = split(axis = var_1196_axis_0, split_sizes = var_1196_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_1196")]; + tensor hidden_states_3 = mul(x = var_1196_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_13_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 3072])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 1, 3328])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_1224 = const()[name = string("op_1224"), val = tensor([0, 2, 1])]; + tensor input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor([2])]; + tensor var_1225 = transpose(perm = var_1224, x = hidden_states_5_cast_fp16)[name = string("transpose_200")]; + tensor input_25 = expand_dims(axes = input_25_axes_0, x = var_1225)[name = string("input_25")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_25)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_1244 = const()[name = string("op_1244"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_1245_cast_fp16 = transpose(perm = var_1244, x = per_layer_slice_1_cast_fp16)[name = string("transpose_199")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_1245_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_27_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_27_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533891008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534218752))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_27_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_1261_axes_0 = const()[name = string("op_1261_axes_0"), val = tensor([2])]; + tensor var_1261_cast_fp16 = squeeze(axes = var_1261_axes_0, x = gated_5_cast_fp16)[name = string("op_1261_cast_fp16")]; + tensor var_1265 = const()[name = string("op_1265"), val = tensor([0, 2, 1])]; + int32 var_1271 = const()[name = string("op_1271"), val = int32(-1)]; + fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_17_cast_fp16 = transpose(perm = var_1265, x = var_1261_cast_fp16)[name = string("transpose_198")]; + tensor var_1273_cast_fp16 = mul(x = x_17_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_1273_cast_fp16")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29_cast_fp16 = concat(axis = var_1271, interleave = input_29_interleave_0, values = (x_17_cast_fp16, var_1273_cast_fp16))[name = string("input_29_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_1268_to_fp16 = const()[name = string("op_1268_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1268_to_fp16, x = input_29_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor var_1278_split_sizes_0 = const()[name = string("op_1278_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1278_axis_0 = const()[name = string("op_1278_axis_0"), val = int32(-1)]; + tensor var_1278_cast_fp16_0, tensor var_1278_cast_fp16_1 = split(axis = var_1278_axis_0, split_sizes = var_1278_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1278_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534221376)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_1278_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = tensor([0x1.7ep-1])]; + tensor x_19_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_11_promoted_to_fp16)[name = string("x_19_cast_fp16")]; + tensor var_1290_axes_0 = const()[name = string("op_1290_axes_0"), val = tensor([0])]; + tensor var_1290_cast_fp16 = squeeze(axes = var_1290_axes_0, x = K_sliding_out_1_cast_fp16)[name = string("op_1290_cast_fp16")]; + tensor var_1292_axes_0 = const()[name = string("op_1292_axes_0"), val = tensor([0])]; + tensor var_1292_cast_fp16 = squeeze(axes = var_1292_axes_0, x = V_sliding_out_1_cast_fp16)[name = string("op_1292_cast_fp16")]; + tensor var_1295_begin_0 = const()[name = string("op_1295_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1295_end_0 = const()[name = string("op_1295_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1295_end_mask_0 = const()[name = string("op_1295_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1295_squeeze_mask_0 = const()[name = string("op_1295_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1295_cast_fp16 = slice_by_index(begin = var_1295_begin_0, end = var_1295_end_0, end_mask = var_1295_end_mask_0, squeeze_mask = var_1295_squeeze_mask_0, x = K_sliding_in)[name = string("op_1295_cast_fp16")]; + tensor K_sliding_slot_3_axes_0 = const()[name = string("K_sliding_slot_3_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_3_cast_fp16 = expand_dims(axes = K_sliding_slot_3_axes_0, x = var_1295_cast_fp16)[name = string("K_sliding_slot_3_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = string("op_1300_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1300_end_0 = const()[name = string("op_1300_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1300_end_mask_0 = const()[name = string("op_1300_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1300_squeeze_mask_0 = const()[name = string("op_1300_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, squeeze_mask = var_1300_squeeze_mask_0, x = V_sliding_in)[name = string("op_1300_cast_fp16")]; + tensor V_sliding_slot_3_axes_0 = const()[name = string("V_sliding_slot_3_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_3_cast_fp16 = expand_dims(axes = V_sliding_slot_3_axes_0, x = var_1300_cast_fp16)[name = string("V_sliding_slot_3_cast_fp16")]; + int32 var_1307 = const()[name = string("op_1307"), val = int32(-1)]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1309_cast_fp16 = mul(x = x_19_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1309_cast_fp16")]; + bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; + tensor input_31_cast_fp16 = concat(axis = var_1307, interleave = input_31_interleave_0, values = (x_19_cast_fp16, var_1309_cast_fp16))[name = string("input_31_cast_fp16")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_1304_to_fp16 = const()[name = string("op_1304_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1304_to_fp16, x = input_31_cast_fp16)[name = string("normed_29_cast_fp16")]; + tensor var_1314_split_sizes_0 = const()[name = string("op_1314_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1314_axis_0 = const()[name = string("op_1314_axis_0"), val = int32(-1)]; + tensor var_1314_cast_fp16_0, tensor var_1314_cast_fp16_1 = split(axis = var_1314_axis_0, split_sizes = var_1314_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1314_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534226560)))]; + tensor h_7_cast_fp16 = mul(x = var_1314_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_1320 = const()[name = string("op_1320"), val = tensor([0, 2, 1])]; + tensor var_1323_axes_0 = const()[name = string("op_1323_axes_0"), val = tensor([2])]; + tensor var_1321_cast_fp16 = transpose(perm = var_1320, x = h_7_cast_fp16)[name = string("transpose_197")]; + tensor var_1323_cast_fp16 = expand_dims(axes = var_1323_axes_0, x = var_1321_cast_fp16)[name = string("op_1323_cast_fp16")]; + string var_1339_pad_type_0 = const()[name = string("op_1339_pad_type_0"), val = string("valid")]; + tensor var_1339_strides_0 = const()[name = string("op_1339_strides_0"), val = tensor([1, 1])]; + tensor var_1339_pad_0 = const()[name = string("op_1339_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1339_dilations_0 = const()[name = string("op_1339_dilations_0"), val = tensor([1, 1])]; + int32 var_1339_groups_0 = const()[name = string("op_1339_groups_0"), val = int32(1)]; + tensor var_1339 = conv(dilations = var_1339_dilations_0, groups = var_1339_groups_0, pad = var_1339_pad_0, pad_type = var_1339_pad_type_0, strides = var_1339_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_1323_cast_fp16)[name = string("op_1339")]; + tensor var_1344 = const()[name = string("op_1344"), val = tensor([1, 8, 256, 1])]; + tensor var_1345 = reshape(shape = var_1344, x = var_1339)[name = string("op_1345")]; + tensor var_1350 = const()[name = string("op_1350"), val = tensor([0, 1, 3, 2])]; + tensor var_1360 = const()[name = string("op_1360"), val = tensor([1, 8, 256])]; + tensor var_1351 = transpose(perm = var_1350, x = var_1345)[name = string("transpose_196")]; + tensor x_21 = reshape(shape = var_1360, x = var_1351)[name = string("x_21")]; + int32 var_1366 = const()[name = string("op_1366"), val = int32(-1)]; + fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; + tensor var_1368 = mul(x = x_21, y = const_13_promoted)[name = string("op_1368")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35 = concat(axis = var_1366, interleave = input_35_interleave_0, values = (x_21, var_1368))[name = string("input_35")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1363_to_fp16, x = input_35)[name = string("normed_33_cast_fp16")]; + tensor var_1373_split_sizes_0 = const()[name = string("op_1373_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1373_axis_0 = const()[name = string("op_1373_axis_0"), val = int32(-1)]; + tensor var_1373_0, tensor var_1373_1 = split(axis = var_1373_axis_0, split_sizes = var_1373_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1373")]; + tensor var_1375 = mul(x = var_1373_0, y = layers_1_self_attn_q_norm_weight)[name = string("op_1375")]; + tensor var_1380 = const()[name = string("op_1380"), val = tensor([1, 8, 1, 256])]; + tensor q_11 = reshape(shape = var_1380, x = var_1375)[name = string("q_11")]; + tensor var_1382_cast_fp16 = mul(x = q_11, y = cos_s)[name = string("op_1382_cast_fp16")]; + tensor var_1383_split_sizes_0 = const()[name = string("op_1383_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1383_axis_0 = const()[name = string("op_1383_axis_0"), val = int32(-1)]; + tensor var_1383_0, tensor var_1383_1 = split(axis = var_1383_axis_0, split_sizes = var_1383_split_sizes_0, x = q_11)[name = string("op_1383")]; + fp16 const_14_promoted = const()[name = string("const_14_promoted"), val = fp16(-0x1p+0)]; + tensor var_1385 = mul(x = var_1383_1, y = const_14_promoted)[name = string("op_1385")]; + int32 var_1387 = const()[name = string("op_1387"), val = int32(-1)]; + bool var_1388_interleave_0 = const()[name = string("op_1388_interleave_0"), val = bool(false)]; + tensor var_1388 = concat(axis = var_1387, interleave = var_1388_interleave_0, values = (var_1385, var_1383_0))[name = string("op_1388")]; + tensor var_1389_cast_fp16 = mul(x = var_1388, y = sin_s)[name = string("op_1389_cast_fp16")]; + tensor q_15_cast_fp16 = add(x = var_1382_cast_fp16, y = var_1389_cast_fp16)[name = string("q_15_cast_fp16")]; + string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")]; + tensor var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor([1, 1])]; + tensor var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor([1, 1])]; + int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)]; + tensor var_1402 = conv(dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = var_1323_cast_fp16)[name = string("op_1402")]; + tensor var_1407 = const()[name = string("op_1407"), val = tensor([1, 2, 256, 1])]; + tensor var_1408 = reshape(shape = var_1407, x = var_1402)[name = string("op_1408")]; + tensor var_1413 = const()[name = string("op_1413"), val = tensor([0, 1, 3, 2])]; + string var_1430_pad_type_0 = const()[name = string("op_1430_pad_type_0"), val = string("valid")]; + tensor var_1430_strides_0 = const()[name = string("op_1430_strides_0"), val = tensor([1, 1])]; + tensor var_1430_pad_0 = const()[name = string("op_1430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1430_dilations_0 = const()[name = string("op_1430_dilations_0"), val = tensor([1, 1])]; + int32 var_1430_groups_0 = const()[name = string("op_1430_groups_0"), val = int32(1)]; + tensor var_1430 = conv(dilations = var_1430_dilations_0, groups = var_1430_groups_0, pad = var_1430_pad_0, pad_type = var_1430_pad_type_0, strides = var_1430_strides_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = var_1323_cast_fp16)[name = string("op_1430")]; + tensor var_1435 = const()[name = string("op_1435"), val = tensor([1, 2, 256, 1])]; + tensor var_1436 = reshape(shape = var_1435, x = var_1430)[name = string("op_1436")]; + tensor var_1441 = const()[name = string("op_1441"), val = tensor([0, 1, 3, 2])]; + tensor var_1451 = const()[name = string("op_1451"), val = tensor([1, 2, 256])]; + tensor var_1414 = transpose(perm = var_1413, x = var_1408)[name = string("transpose_195")]; + tensor x_23 = reshape(shape = var_1451, x = var_1414)[name = string("x_23")]; + int32 var_1457 = const()[name = string("op_1457"), val = int32(-1)]; + fp16 const_15_promoted = const()[name = string("const_15_promoted"), val = fp16(-0x1p+0)]; + tensor var_1459 = mul(x = x_23, y = const_15_promoted)[name = string("op_1459")]; + bool input_37_interleave_0 = const()[name = string("input_37_interleave_0"), val = bool(false)]; + tensor input_37 = concat(axis = var_1457, interleave = input_37_interleave_0, values = (x_23, var_1459))[name = string("input_37")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1454_to_fp16 = const()[name = string("op_1454_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1454_to_fp16, x = input_37)[name = string("normed_37_cast_fp16")]; + tensor var_1464_split_sizes_0 = const()[name = string("op_1464_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1464_axis_0 = const()[name = string("op_1464_axis_0"), val = int32(-1)]; + tensor var_1464_0, tensor var_1464_1 = split(axis = var_1464_axis_0, split_sizes = var_1464_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1464")]; + tensor var_1466 = mul(x = var_1464_0, y = layers_1_self_attn_k_norm_weight)[name = string("op_1466")]; + tensor var_1471 = const()[name = string("op_1471"), val = tensor([1, 2, 1, 256])]; + tensor q_13 = reshape(shape = var_1471, x = var_1466)[name = string("q_13")]; + fp16 var_1473_promoted = const()[name = string("op_1473_promoted"), val = fp16(0x1p+1)]; + tensor var_1442 = transpose(perm = var_1441, x = var_1436)[name = string("transpose_194")]; + tensor var_1474 = pow(x = var_1442, y = var_1473_promoted)[name = string("op_1474")]; + tensor var_1479_axes_0 = const()[name = string("op_1479_axes_0"), val = tensor([-1])]; + bool var_1479_keep_dims_0 = const()[name = string("op_1479_keep_dims_0"), val = bool(true)]; + tensor var_1479 = reduce_mean(axes = var_1479_axes_0, keep_dims = var_1479_keep_dims_0, x = var_1474)[name = string("op_1479")]; + fp16 var_1481_to_fp16 = const()[name = string("op_1481_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_3_cast_fp16 = add(x = var_1479, y = var_1481_to_fp16)[name = string("mean_sq_3_cast_fp16")]; + fp32 var_1483_epsilon_0 = const()[name = string("op_1483_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1483_cast_fp16 = rsqrt(epsilon = var_1483_epsilon_0, x = mean_sq_3_cast_fp16)[name = string("op_1483_cast_fp16")]; + tensor input_41_cast_fp16 = mul(x = var_1442, y = var_1483_cast_fp16)[name = string("input_41_cast_fp16")]; + tensor var_1485_cast_fp16 = mul(x = q_13, y = cos_s)[name = string("op_1485_cast_fp16")]; + tensor var_1486_split_sizes_0 = const()[name = string("op_1486_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1486_axis_0 = const()[name = string("op_1486_axis_0"), val = int32(-1)]; + tensor var_1486_0, tensor var_1486_1 = split(axis = var_1486_axis_0, split_sizes = var_1486_split_sizes_0, x = q_13)[name = string("op_1486")]; + fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)]; + tensor var_1488 = mul(x = var_1486_1, y = const_16_promoted)[name = string("op_1488")]; + int32 var_1490 = const()[name = string("op_1490"), val = int32(-1)]; + bool var_1491_interleave_0 = const()[name = string("op_1491_interleave_0"), val = bool(false)]; + tensor var_1491 = concat(axis = var_1490, interleave = var_1491_interleave_0, values = (var_1488, var_1486_0))[name = string("op_1491")]; + tensor var_1492_cast_fp16 = mul(x = var_1491, y = sin_s)[name = string("op_1492_cast_fp16")]; + tensor input_39_cast_fp16 = add(x = var_1485_cast_fp16, y = var_1492_cast_fp16)[name = string("input_39_cast_fp16")]; + tensor k_padded_3_pad_0 = const()[name = string("k_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_3_mode_0 = const()[name = string("k_padded_3_mode_0"), val = string("constant")]; + fp16 const_17_to_fp16 = const()[name = string("const_17_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_3_cast_fp16 = pad(constant_val = const_17_to_fp16, mode = k_padded_3_mode_0, pad = k_padded_3_pad_0, x = input_39_cast_fp16)[name = string("k_padded_3_cast_fp16")]; + tensor v_padded_3_pad_0 = const()[name = string("v_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_3_mode_0 = const()[name = string("v_padded_3_mode_0"), val = string("constant")]; + fp16 const_18_to_fp16 = const()[name = string("const_18_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_3_cast_fp16 = pad(constant_val = const_18_to_fp16, mode = v_padded_3_mode_0, pad = v_padded_3_pad_0, x = input_41_cast_fp16)[name = string("v_padded_3_cast_fp16")]; + tensor var_1521_begin_0 = const()[name = string("op_1521_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1521_end_0 = const()[name = string("op_1521_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1521_end_mask_0 = const()[name = string("op_1521_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1521_cast_fp16 = slice_by_index(begin = var_1521_begin_0, end = var_1521_end_0, end_mask = var_1521_end_mask_0, x = K_sliding_slot_3_cast_fp16)[name = string("op_1521_cast_fp16")]; + int32 var_1528 = const()[name = string("op_1528"), val = int32(2)]; + bool K_sliding_out_3_interleave_0 = const()[name = string("K_sliding_out_3_interleave_0"), val = bool(false)]; + tensor K_sliding_out_3_cast_fp16 = concat(axis = var_1528, interleave = K_sliding_out_3_interleave_0, values = (var_1521_cast_fp16, k_padded_3_cast_fp16))[name = string("K_sliding_out_3_cast_fp16")]; + tensor var_1544_begin_0 = const()[name = string("op_1544_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1544_end_0 = const()[name = string("op_1544_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1544_end_mask_0 = const()[name = string("op_1544_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1544_cast_fp16 = slice_by_index(begin = var_1544_begin_0, end = var_1544_end_0, end_mask = var_1544_end_mask_0, x = V_sliding_slot_3_cast_fp16)[name = string("op_1544_cast_fp16")]; + int32 var_1551 = const()[name = string("op_1551"), val = int32(2)]; + bool V_sliding_out_3_interleave_0 = const()[name = string("V_sliding_out_3_interleave_0"), val = bool(false)]; + tensor V_sliding_out_3_cast_fp16 = concat(axis = var_1551, interleave = V_sliding_out_3_interleave_0, values = (var_1544_cast_fp16, v_padded_3_cast_fp16))[name = string("V_sliding_out_3_cast_fp16")]; + tensor K_for_attn_3_begin_0 = const()[name = string("K_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_3_end_0 = const()[name = string("K_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_3_end_mask_0 = const()[name = string("K_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_3_cast_fp16 = slice_by_index(begin = K_for_attn_3_begin_0, end = K_for_attn_3_end_0, end_mask = K_for_attn_3_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("K_for_attn_3_cast_fp16")]; + tensor V_for_attn_3_begin_0 = const()[name = string("V_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_3_end_0 = const()[name = string("V_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_3_end_mask_0 = const()[name = string("V_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_3_cast_fp16 = slice_by_index(begin = V_for_attn_3_begin_0, end = V_for_attn_3_end_0, end_mask = V_for_attn_3_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("V_for_attn_3_cast_fp16")]; + tensor transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = K_for_attn_3_cast_fp16)[name = string("transpose_193")]; + tensor tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_4, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")]; + tensor transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_192")]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_5, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = V_for_attn_3_cast_fp16)[name = string("transpose_191")]; + tensor tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_6, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_190")]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_7, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")]; + tensor V_expanded_3_perm_0 = const()[name = string("V_expanded_3_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor transpose_49_cast_fp16 = transpose(perm = transpose_49_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_189")]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_15_cast_fp16, y = transpose_49_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_27_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_27_cast_fp16)[name = string("reduce_max_1")]; + tensor var_1592 = sub(x = x_27_cast_fp16, y = reduce_max_1)[name = string("op_1592")]; + tensor var_1598 = exp(x = var_1592)[name = string("op_1598")]; + tensor var_1608_axes_0 = const()[name = string("op_1608_axes_0"), val = tensor([-1])]; + bool var_1608_keep_dims_0 = const()[name = string("op_1608_keep_dims_0"), val = bool(true)]; + tensor var_1608 = reduce_sum(axes = var_1608_axes_0, keep_dims = var_1608_keep_dims_0, x = var_1598)[name = string("op_1608")]; + tensor var_1614_cast_fp16 = real_div(x = var_1598, y = var_1608)[name = string("op_1614_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor V_expanded_3_cast_fp16 = transpose(perm = V_expanded_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_188")]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_1614_cast_fp16, y = V_expanded_3_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_1625 = const()[name = string("op_1625"), val = tensor([0, 2, 1, 3])]; + tensor var_1632 = const()[name = string("op_1632"), val = tensor([1, 1, -1])]; + tensor var_1626_cast_fp16 = transpose(perm = var_1625, x = attn_output_7_cast_fp16)[name = string("transpose_187")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_1632, x = var_1626_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_1637 = const()[name = string("op_1637"), val = tensor([0, 2, 1])]; + string var_1653_pad_type_0 = const()[name = string("op_1653_pad_type_0"), val = string("valid")]; + int32 var_1653_groups_0 = const()[name = string("op_1653_groups_0"), val = int32(1)]; + tensor var_1653_strides_0 = const()[name = string("op_1653_strides_0"), val = tensor([1])]; + tensor var_1653_pad_0 = const()[name = string("op_1653_pad_0"), val = tensor([0, 0])]; + tensor var_1653_dilations_0 = const()[name = string("op_1653_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534231744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536853248))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1638_cast_fp16 = transpose(perm = var_1637, x = attn_output_9_cast_fp16)[name = string("transpose_186")]; + tensor var_1653_cast_fp16 = conv(dilations = var_1653_dilations_0, groups = var_1653_groups_0, pad = var_1653_pad_0, pad_type = var_1653_pad_type_0, strides = var_1653_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1638_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor var_1657 = const()[name = string("op_1657"), val = tensor([0, 2, 1])]; + int32 var_1663 = const()[name = string("op_1663"), val = int32(-1)]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_31_cast_fp16 = transpose(perm = var_1657, x = var_1653_cast_fp16)[name = string("transpose_185")]; + tensor var_1665_cast_fp16 = mul(x = x_31_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1665_cast_fp16")]; + bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; + tensor input_45_cast_fp16 = concat(axis = var_1663, interleave = input_45_interleave_0, values = (x_31_cast_fp16, var_1665_cast_fp16))[name = string("input_45_cast_fp16")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1660_to_fp16 = const()[name = string("op_1660_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1660_to_fp16, x = input_45_cast_fp16)[name = string("normed_41_cast_fp16")]; + tensor var_1670_split_sizes_0 = const()[name = string("op_1670_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1670_axis_0 = const()[name = string("op_1670_axis_0"), val = int32(-1)]; + tensor var_1670_cast_fp16_0, tensor var_1670_cast_fp16_1 = split(axis = var_1670_axis_0, split_sizes = var_1670_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1670_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536855872)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1670_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_19_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_33_cast_fp16")]; + int32 var_1679 = const()[name = string("op_1679"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1681_cast_fp16 = mul(x = x_33_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1681_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_1679, interleave = input_47_interleave_0, values = (x_33_cast_fp16, var_1681_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1676_to_fp16 = const()[name = string("op_1676_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1676_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1686_split_sizes_0 = const()[name = string("op_1686_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1686_axis_0 = const()[name = string("op_1686_axis_0"), val = int32(-1)]; + tensor var_1686_cast_fp16_0, tensor var_1686_cast_fp16_1 = split(axis = var_1686_axis_0, split_sizes = var_1686_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1686_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536861056)))]; + tensor h_9_cast_fp16 = mul(x = var_1686_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1697 = const()[name = string("op_1697"), val = tensor([0, 2, 1])]; + tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; + tensor var_1698 = transpose(perm = var_1697, x = h_9_cast_fp16)[name = string("transpose_184")]; + tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_1698)[name = string("input_49")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_49)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_49)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_51 = mul(x = gate_7, y = up_3)[name = string("input_51")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_51)[name = string("mlp_out_3")]; + tensor var_1738_axes_0 = const()[name = string("op_1738_axes_0"), val = tensor([2])]; + tensor var_1738 = squeeze(axes = var_1738_axes_0, x = mlp_out_3)[name = string("op_1738")]; + tensor var_1742 = const()[name = string("op_1742"), val = tensor([0, 2, 1])]; + int32 var_1748 = const()[name = string("op_1748"), val = int32(-1)]; + fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; + tensor x_35 = transpose(perm = var_1742, x = var_1738)[name = string("transpose_183")]; + tensor var_1750 = mul(x = x_35, y = const_21_promoted)[name = string("op_1750")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53 = concat(axis = var_1748, interleave = input_53_interleave_0, values = (x_35, var_1750))[name = string("input_53")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1745_to_fp16 = const()[name = string("op_1745_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1745_to_fp16, x = input_53)[name = string("normed_49_cast_fp16")]; + tensor var_1755_split_sizes_0 = const()[name = string("op_1755_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1755_axis_0 = const()[name = string("op_1755_axis_0"), val = int32(-1)]; + tensor var_1755_0, tensor var_1755_1 = split(axis = var_1755_axis_0, split_sizes = var_1755_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1755")]; + tensor hidden_states_13 = mul(x = var_1755_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_33_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 3328])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 1, 3584])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1783 = const()[name = string("op_1783"), val = tensor([0, 2, 1])]; + tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; + tensor var_1784 = transpose(perm = var_1783, x = hidden_states_15_cast_fp16)[name = string("transpose_182")]; + tensor input_55 = expand_dims(axes = input_55_axes_0, x = var_1784)[name = string("input_55")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_55)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1803 = const()[name = string("op_1803"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1804_cast_fp16 = transpose(perm = var_1803, x = per_layer_slice_3_cast_fp16)[name = string("transpose_181")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1804_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_57_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_57_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536866240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537193984))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_1820_axes_0 = const()[name = string("op_1820_axes_0"), val = tensor([2])]; + tensor var_1820_cast_fp16 = squeeze(axes = var_1820_axes_0, x = gated_11_cast_fp16)[name = string("op_1820_cast_fp16")]; + tensor var_1824 = const()[name = string("op_1824"), val = tensor([0, 2, 1])]; + int32 var_1830 = const()[name = string("op_1830"), val = int32(-1)]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_37_cast_fp16 = transpose(perm = var_1824, x = var_1820_cast_fp16)[name = string("transpose_180")]; + tensor var_1832_cast_fp16 = mul(x = x_37_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1832_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_1830, interleave = input_59_interleave_0, values = (x_37_cast_fp16, var_1832_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1827_to_fp16 = const()[name = string("op_1827_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1827_to_fp16, x = input_59_cast_fp16)[name = string("normed_53_cast_fp16")]; + tensor var_1837_split_sizes_0 = const()[name = string("op_1837_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1837_axis_0 = const()[name = string("op_1837_axis_0"), val = int32(-1)]; + tensor var_1837_cast_fp16_0, tensor var_1837_cast_fp16_1 = split(axis = var_1837_axis_0, split_sizes = var_1837_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1837_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537196608)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_1837_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor([0x1.6cp-1])]; + tensor x_39_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_39_cast_fp16")]; + tensor var_1849_axes_0 = const()[name = string("op_1849_axes_0"), val = tensor([0])]; + tensor var_1849_cast_fp16 = squeeze(axes = var_1849_axes_0, x = K_sliding_out_3_cast_fp16)[name = string("op_1849_cast_fp16")]; + tensor var_1851_axes_0 = const()[name = string("op_1851_axes_0"), val = tensor([0])]; + tensor var_1851_cast_fp16 = squeeze(axes = var_1851_axes_0, x = V_sliding_out_3_cast_fp16)[name = string("op_1851_cast_fp16")]; + tensor var_1854_begin_0 = const()[name = string("op_1854_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_1854_end_0 = const()[name = string("op_1854_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_1854_end_mask_0 = const()[name = string("op_1854_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1854_squeeze_mask_0 = const()[name = string("op_1854_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1854_cast_fp16 = slice_by_index(begin = var_1854_begin_0, end = var_1854_end_0, end_mask = var_1854_end_mask_0, squeeze_mask = var_1854_squeeze_mask_0, x = K_sliding_in)[name = string("op_1854_cast_fp16")]; + tensor K_sliding_slot_5_axes_0 = const()[name = string("K_sliding_slot_5_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_5_cast_fp16 = expand_dims(axes = K_sliding_slot_5_axes_0, x = var_1854_cast_fp16)[name = string("K_sliding_slot_5_cast_fp16")]; + tensor var_1859_begin_0 = const()[name = string("op_1859_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_1859_end_0 = const()[name = string("op_1859_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_1859_end_mask_0 = const()[name = string("op_1859_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1859_squeeze_mask_0 = const()[name = string("op_1859_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1859_cast_fp16 = slice_by_index(begin = var_1859_begin_0, end = var_1859_end_0, end_mask = var_1859_end_mask_0, squeeze_mask = var_1859_squeeze_mask_0, x = V_sliding_in)[name = string("op_1859_cast_fp16")]; + tensor V_sliding_slot_5_axes_0 = const()[name = string("V_sliding_slot_5_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_5_cast_fp16 = expand_dims(axes = V_sliding_slot_5_axes_0, x = var_1859_cast_fp16)[name = string("V_sliding_slot_5_cast_fp16")]; + int32 var_1866 = const()[name = string("op_1866"), val = int32(-1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1868_cast_fp16 = mul(x = x_39_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1868_cast_fp16")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61_cast_fp16 = concat(axis = var_1866, interleave = input_61_interleave_0, values = (x_39_cast_fp16, var_1868_cast_fp16))[name = string("input_61_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_1863_to_fp16 = const()[name = string("op_1863_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1863_to_fp16, x = input_61_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_1873_split_sizes_0 = const()[name = string("op_1873_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1873_axis_0 = const()[name = string("op_1873_axis_0"), val = int32(-1)]; + tensor var_1873_cast_fp16_0, tensor var_1873_cast_fp16_1 = split(axis = var_1873_axis_0, split_sizes = var_1873_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1873_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537201792)))]; + tensor h_13_cast_fp16 = mul(x = var_1873_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_1879 = const()[name = string("op_1879"), val = tensor([0, 2, 1])]; + tensor var_1882_axes_0 = const()[name = string("op_1882_axes_0"), val = tensor([2])]; + tensor var_1880_cast_fp16 = transpose(perm = var_1879, x = h_13_cast_fp16)[name = string("transpose_179")]; + tensor var_1882_cast_fp16 = expand_dims(axes = var_1882_axes_0, x = var_1880_cast_fp16)[name = string("op_1882_cast_fp16")]; + string var_1898_pad_type_0 = const()[name = string("op_1898_pad_type_0"), val = string("valid")]; + tensor var_1898_strides_0 = const()[name = string("op_1898_strides_0"), val = tensor([1, 1])]; + tensor var_1898_pad_0 = const()[name = string("op_1898_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1898_dilations_0 = const()[name = string("op_1898_dilations_0"), val = tensor([1, 1])]; + int32 var_1898_groups_0 = const()[name = string("op_1898_groups_0"), val = int32(1)]; + tensor var_1898 = conv(dilations = var_1898_dilations_0, groups = var_1898_groups_0, pad = var_1898_pad_0, pad_type = var_1898_pad_type_0, strides = var_1898_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1882_cast_fp16)[name = string("op_1898")]; + tensor var_1903 = const()[name = string("op_1903"), val = tensor([1, 8, 256, 1])]; + tensor var_1904 = reshape(shape = var_1903, x = var_1898)[name = string("op_1904")]; + tensor var_1909 = const()[name = string("op_1909"), val = tensor([0, 1, 3, 2])]; + tensor var_1919 = const()[name = string("op_1919"), val = tensor([1, 8, 256])]; + tensor var_1910 = transpose(perm = var_1909, x = var_1904)[name = string("transpose_178")]; + tensor x_41 = reshape(shape = var_1919, x = var_1910)[name = string("x_41")]; + int32 var_1925 = const()[name = string("op_1925"), val = int32(-1)]; + fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; + tensor var_1927 = mul(x = x_41, y = const_25_promoted)[name = string("op_1927")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_1925, interleave = input_65_interleave_0, values = (x_41, var_1927))[name = string("input_65")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_1922_to_fp16 = const()[name = string("op_1922_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1922_to_fp16, x = input_65)[name = string("normed_61_cast_fp16")]; + tensor var_1932_split_sizes_0 = const()[name = string("op_1932_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1932_axis_0 = const()[name = string("op_1932_axis_0"), val = int32(-1)]; + tensor var_1932_0, tensor var_1932_1 = split(axis = var_1932_axis_0, split_sizes = var_1932_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1932")]; + tensor var_1934 = mul(x = var_1932_0, y = layers_2_self_attn_q_norm_weight)[name = string("op_1934")]; + tensor var_1939 = const()[name = string("op_1939"), val = tensor([1, 8, 1, 256])]; + tensor q_19 = reshape(shape = var_1939, x = var_1934)[name = string("q_19")]; + tensor var_1941_cast_fp16 = mul(x = q_19, y = cos_s)[name = string("op_1941_cast_fp16")]; + tensor var_1942_split_sizes_0 = const()[name = string("op_1942_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1942_axis_0 = const()[name = string("op_1942_axis_0"), val = int32(-1)]; + tensor var_1942_0, tensor var_1942_1 = split(axis = var_1942_axis_0, split_sizes = var_1942_split_sizes_0, x = q_19)[name = string("op_1942")]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_1944 = mul(x = var_1942_1, y = const_26_promoted)[name = string("op_1944")]; + int32 var_1946 = const()[name = string("op_1946"), val = int32(-1)]; + bool var_1947_interleave_0 = const()[name = string("op_1947_interleave_0"), val = bool(false)]; + tensor var_1947 = concat(axis = var_1946, interleave = var_1947_interleave_0, values = (var_1944, var_1942_0))[name = string("op_1947")]; + tensor var_1948_cast_fp16 = mul(x = var_1947, y = sin_s)[name = string("op_1948_cast_fp16")]; + tensor q_23_cast_fp16 = add(x = var_1941_cast_fp16, y = var_1948_cast_fp16)[name = string("q_23_cast_fp16")]; + string var_1961_pad_type_0 = const()[name = string("op_1961_pad_type_0"), val = string("valid")]; + tensor var_1961_strides_0 = const()[name = string("op_1961_strides_0"), val = tensor([1, 1])]; + tensor var_1961_pad_0 = const()[name = string("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_dilations_0 = const()[name = string("op_1961_dilations_0"), val = tensor([1, 1])]; + int32 var_1961_groups_0 = const()[name = string("op_1961_groups_0"), val = int32(1)]; + tensor var_1961 = conv(dilations = var_1961_dilations_0, groups = var_1961_groups_0, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1961_strides_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = var_1882_cast_fp16)[name = string("op_1961")]; + tensor var_1966 = const()[name = string("op_1966"), val = tensor([1, 2, 256, 1])]; + tensor var_1967 = reshape(shape = var_1966, x = var_1961)[name = string("op_1967")]; + tensor var_1972 = const()[name = string("op_1972"), val = tensor([0, 1, 3, 2])]; + string var_1989_pad_type_0 = const()[name = string("op_1989_pad_type_0"), val = string("valid")]; + tensor var_1989_strides_0 = const()[name = string("op_1989_strides_0"), val = tensor([1, 1])]; + tensor var_1989_pad_0 = const()[name = string("op_1989_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1989_dilations_0 = const()[name = string("op_1989_dilations_0"), val = tensor([1, 1])]; + int32 var_1989_groups_0 = const()[name = string("op_1989_groups_0"), val = int32(1)]; + tensor var_1989 = conv(dilations = var_1989_dilations_0, groups = var_1989_groups_0, pad = var_1989_pad_0, pad_type = var_1989_pad_type_0, strides = var_1989_strides_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = var_1882_cast_fp16)[name = string("op_1989")]; + tensor var_1994 = const()[name = string("op_1994"), val = tensor([1, 2, 256, 1])]; + tensor var_1995 = reshape(shape = var_1994, x = var_1989)[name = string("op_1995")]; + tensor var_2000 = const()[name = string("op_2000"), val = tensor([0, 1, 3, 2])]; + tensor var_2010 = const()[name = string("op_2010"), val = tensor([1, 2, 256])]; + tensor var_1973 = transpose(perm = var_1972, x = var_1967)[name = string("transpose_177")]; + tensor x_43 = reshape(shape = var_2010, x = var_1973)[name = string("x_43")]; + int32 var_2016 = const()[name = string("op_2016"), val = int32(-1)]; + fp16 const_27_promoted = const()[name = string("const_27_promoted"), val = fp16(-0x1p+0)]; + tensor var_2018 = mul(x = x_43, y = const_27_promoted)[name = string("op_2018")]; + bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; + tensor input_67 = concat(axis = var_2016, interleave = input_67_interleave_0, values = (x_43, var_2018))[name = string("input_67")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_2013_to_fp16 = const()[name = string("op_2013_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_2013_to_fp16, x = input_67)[name = string("normed_65_cast_fp16")]; + tensor var_2023_split_sizes_0 = const()[name = string("op_2023_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2023_axis_0 = const()[name = string("op_2023_axis_0"), val = int32(-1)]; + tensor var_2023_0, tensor var_2023_1 = split(axis = var_2023_axis_0, split_sizes = var_2023_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_2023")]; + tensor var_2025 = mul(x = var_2023_0, y = layers_2_self_attn_k_norm_weight)[name = string("op_2025")]; + tensor var_2030 = const()[name = string("op_2030"), val = tensor([1, 2, 1, 256])]; + tensor q_21 = reshape(shape = var_2030, x = var_2025)[name = string("q_21")]; + fp16 var_2032_promoted = const()[name = string("op_2032_promoted"), val = fp16(0x1p+1)]; + tensor var_2001 = transpose(perm = var_2000, x = var_1995)[name = string("transpose_176")]; + tensor var_2033 = pow(x = var_2001, y = var_2032_promoted)[name = string("op_2033")]; + tensor var_2038_axes_0 = const()[name = string("op_2038_axes_0"), val = tensor([-1])]; + bool var_2038_keep_dims_0 = const()[name = string("op_2038_keep_dims_0"), val = bool(true)]; + tensor var_2038 = reduce_mean(axes = var_2038_axes_0, keep_dims = var_2038_keep_dims_0, x = var_2033)[name = string("op_2038")]; + fp16 var_2040_to_fp16 = const()[name = string("op_2040_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_5_cast_fp16 = add(x = var_2038, y = var_2040_to_fp16)[name = string("mean_sq_5_cast_fp16")]; + fp32 var_2042_epsilon_0 = const()[name = string("op_2042_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2042_cast_fp16 = rsqrt(epsilon = var_2042_epsilon_0, x = mean_sq_5_cast_fp16)[name = string("op_2042_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_2001, y = var_2042_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_2044_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_2044_cast_fp16")]; + tensor var_2045_split_sizes_0 = const()[name = string("op_2045_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2045_axis_0 = const()[name = string("op_2045_axis_0"), val = int32(-1)]; + tensor var_2045_0, tensor var_2045_1 = split(axis = var_2045_axis_0, split_sizes = var_2045_split_sizes_0, x = q_21)[name = string("op_2045")]; + fp16 const_28_promoted = const()[name = string("const_28_promoted"), val = fp16(-0x1p+0)]; + tensor var_2047 = mul(x = var_2045_1, y = const_28_promoted)[name = string("op_2047")]; + int32 var_2049 = const()[name = string("op_2049"), val = int32(-1)]; + bool var_2050_interleave_0 = const()[name = string("op_2050_interleave_0"), val = bool(false)]; + tensor var_2050 = concat(axis = var_2049, interleave = var_2050_interleave_0, values = (var_2047, var_2045_0))[name = string("op_2050")]; + tensor var_2051_cast_fp16 = mul(x = var_2050, y = sin_s)[name = string("op_2051_cast_fp16")]; + tensor input_69_cast_fp16 = add(x = var_2044_cast_fp16, y = var_2051_cast_fp16)[name = string("input_69_cast_fp16")]; + tensor k_padded_5_pad_0 = const()[name = string("k_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_5_mode_0 = const()[name = string("k_padded_5_mode_0"), val = string("constant")]; + fp16 const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_5_cast_fp16 = pad(constant_val = const_29_to_fp16, mode = k_padded_5_mode_0, pad = k_padded_5_pad_0, x = input_69_cast_fp16)[name = string("k_padded_5_cast_fp16")]; + tensor v_padded_5_pad_0 = const()[name = string("v_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_5_mode_0 = const()[name = string("v_padded_5_mode_0"), val = string("constant")]; + fp16 const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_5_cast_fp16 = pad(constant_val = const_30_to_fp16, mode = v_padded_5_mode_0, pad = v_padded_5_pad_0, x = input_71_cast_fp16)[name = string("v_padded_5_cast_fp16")]; + tensor var_2080_begin_0 = const()[name = string("op_2080_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2080_end_0 = const()[name = string("op_2080_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2080_end_mask_0 = const()[name = string("op_2080_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2080_cast_fp16 = slice_by_index(begin = var_2080_begin_0, end = var_2080_end_0, end_mask = var_2080_end_mask_0, x = K_sliding_slot_5_cast_fp16)[name = string("op_2080_cast_fp16")]; + int32 var_2087 = const()[name = string("op_2087"), val = int32(2)]; + bool K_sliding_out_5_interleave_0 = const()[name = string("K_sliding_out_5_interleave_0"), val = bool(false)]; + tensor K_sliding_out_5_cast_fp16 = concat(axis = var_2087, interleave = K_sliding_out_5_interleave_0, values = (var_2080_cast_fp16, k_padded_5_cast_fp16))[name = string("K_sliding_out_5_cast_fp16")]; + tensor var_2103_begin_0 = const()[name = string("op_2103_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2103_end_0 = const()[name = string("op_2103_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2103_end_mask_0 = const()[name = string("op_2103_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2103_cast_fp16 = slice_by_index(begin = var_2103_begin_0, end = var_2103_end_0, end_mask = var_2103_end_mask_0, x = V_sliding_slot_5_cast_fp16)[name = string("op_2103_cast_fp16")]; + int32 var_2110 = const()[name = string("op_2110"), val = int32(2)]; + bool V_sliding_out_5_interleave_0 = const()[name = string("V_sliding_out_5_interleave_0"), val = bool(false)]; + tensor V_sliding_out_5_cast_fp16 = concat(axis = var_2110, interleave = V_sliding_out_5_interleave_0, values = (var_2103_cast_fp16, v_padded_5_cast_fp16))[name = string("V_sliding_out_5_cast_fp16")]; + tensor K_for_attn_5_begin_0 = const()[name = string("K_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_5_end_0 = const()[name = string("K_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_5_end_mask_0 = const()[name = string("K_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_5_cast_fp16 = slice_by_index(begin = K_for_attn_5_begin_0, end = K_for_attn_5_end_0, end_mask = K_for_attn_5_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("K_for_attn_5_cast_fp16")]; + tensor V_for_attn_5_begin_0 = const()[name = string("V_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_5_end_0 = const()[name = string("V_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_5_end_mask_0 = const()[name = string("V_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_5_cast_fp16 = slice_by_index(begin = V_for_attn_5_begin_0, end = V_for_attn_5_end_0, end_mask = V_for_attn_5_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("V_for_attn_5_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = K_for_attn_5_cast_fp16)[name = string("transpose_175")]; + tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_8, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_174")]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_9, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = V_for_attn_5_cast_fp16)[name = string("transpose_173")]; + tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_10, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; + tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_172")]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_11, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor transpose_50_cast_fp16 = transpose(perm = transpose_50_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_171")]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_23_cast_fp16, y = transpose_50_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_47_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_47_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_47_cast_fp16)[name = string("reduce_max_2")]; + tensor var_2151 = sub(x = x_47_cast_fp16, y = reduce_max_2)[name = string("op_2151")]; + tensor var_2157 = exp(x = var_2151)[name = string("op_2157")]; + tensor var_2167_axes_0 = const()[name = string("op_2167_axes_0"), val = tensor([-1])]; + bool var_2167_keep_dims_0 = const()[name = string("op_2167_keep_dims_0"), val = bool(true)]; + tensor var_2167 = reduce_sum(axes = var_2167_axes_0, keep_dims = var_2167_keep_dims_0, x = var_2157)[name = string("op_2167")]; + tensor var_2173_cast_fp16 = real_div(x = var_2157, y = var_2167)[name = string("op_2173_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_170")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_2173_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_2184 = const()[name = string("op_2184"), val = tensor([0, 2, 1, 3])]; + tensor var_2191 = const()[name = string("op_2191"), val = tensor([1, 1, -1])]; + tensor var_2185_cast_fp16 = transpose(perm = var_2184, x = attn_output_13_cast_fp16)[name = string("transpose_169")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2191, x = var_2185_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2196 = const()[name = string("op_2196"), val = tensor([0, 2, 1])]; + string var_2212_pad_type_0 = const()[name = string("op_2212_pad_type_0"), val = string("valid")]; + int32 var_2212_groups_0 = const()[name = string("op_2212_groups_0"), val = int32(1)]; + tensor var_2212_strides_0 = const()[name = string("op_2212_strides_0"), val = tensor([1])]; + tensor var_2212_pad_0 = const()[name = string("op_2212_pad_0"), val = tensor([0, 0])]; + tensor var_2212_dilations_0 = const()[name = string("op_2212_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537206976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539828480))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2197_cast_fp16 = transpose(perm = var_2196, x = attn_output_15_cast_fp16)[name = string("transpose_168")]; + tensor var_2212_cast_fp16 = conv(dilations = var_2212_dilations_0, groups = var_2212_groups_0, pad = var_2212_pad_0, pad_type = var_2212_pad_type_0, strides = var_2212_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2197_cast_fp16)[name = string("op_2212_cast_fp16")]; + tensor var_2216 = const()[name = string("op_2216"), val = tensor([0, 2, 1])]; + int32 var_2222 = const()[name = string("op_2222"), val = int32(-1)]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_51_cast_fp16 = transpose(perm = var_2216, x = var_2212_cast_fp16)[name = string("transpose_167")]; + tensor var_2224_cast_fp16 = mul(x = x_51_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2224_cast_fp16")]; + bool input_75_interleave_0 = const()[name = string("input_75_interleave_0"), val = bool(false)]; + tensor input_75_cast_fp16 = concat(axis = var_2222, interleave = input_75_interleave_0, values = (x_51_cast_fp16, var_2224_cast_fp16))[name = string("input_75_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_2219_to_fp16 = const()[name = string("op_2219_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2219_to_fp16, x = input_75_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor var_2229_split_sizes_0 = const()[name = string("op_2229_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2229_axis_0 = const()[name = string("op_2229_axis_0"), val = int32(-1)]; + tensor var_2229_cast_fp16_0, tensor var_2229_cast_fp16_1 = split(axis = var_2229_axis_0, split_sizes = var_2229_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2229_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539831104)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_2229_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_39_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_2238 = const()[name = string("op_2238"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2240_cast_fp16 = mul(x = x_53_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2240_cast_fp16")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77_cast_fp16 = concat(axis = var_2238, interleave = input_77_interleave_0, values = (x_53_cast_fp16, var_2240_cast_fp16))[name = string("input_77_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2235_to_fp16, x = input_77_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_2245_split_sizes_0 = const()[name = string("op_2245_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2245_axis_0 = const()[name = string("op_2245_axis_0"), val = int32(-1)]; + tensor var_2245_cast_fp16_0, tensor var_2245_cast_fp16_1 = split(axis = var_2245_axis_0, split_sizes = var_2245_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2245_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539836288)))]; + tensor h_15_cast_fp16 = mul(x = var_2245_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_2256 = const()[name = string("op_2256"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_2257 = transpose(perm = var_2256, x = h_15_cast_fp16)[name = string("transpose_166")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_2257)[name = string("input_79")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_79)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_81 = mul(x = gate_11, y = up_5)[name = string("input_81")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_81)[name = string("mlp_out_5")]; + tensor var_2297_axes_0 = const()[name = string("op_2297_axes_0"), val = tensor([2])]; + tensor var_2297 = squeeze(axes = var_2297_axes_0, x = mlp_out_5)[name = string("op_2297")]; + tensor var_2301 = const()[name = string("op_2301"), val = tensor([0, 2, 1])]; + int32 var_2307 = const()[name = string("op_2307"), val = int32(-1)]; + fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; + tensor x_55 = transpose(perm = var_2301, x = var_2297)[name = string("transpose_165")]; + tensor var_2309 = mul(x = x_55, y = const_33_promoted)[name = string("op_2309")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83 = concat(axis = var_2307, interleave = input_83_interleave_0, values = (x_55, var_2309))[name = string("input_83")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_2304_to_fp16 = const()[name = string("op_2304_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2304_to_fp16, x = input_83)[name = string("normed_77_cast_fp16")]; + tensor var_2314_split_sizes_0 = const()[name = string("op_2314_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2314_axis_0 = const()[name = string("op_2314_axis_0"), val = int32(-1)]; + tensor var_2314_0, tensor var_2314_1 = split(axis = var_2314_axis_0, split_sizes = var_2314_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2314")]; + tensor hidden_states_23 = mul(x = var_2314_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 3584])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 1, 3840])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_2342 = const()[name = string("op_2342"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_2343 = transpose(perm = var_2342, x = hidden_states_25_cast_fp16)[name = string("transpose_164")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_2343)[name = string("input_85")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_85)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_2362 = const()[name = string("op_2362"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_2363_cast_fp16 = transpose(perm = var_2362, x = per_layer_slice_5_cast_fp16)[name = string("transpose_163")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_2363_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_87_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_87_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539841472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540169216))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_87_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_2379_axes_0 = const()[name = string("op_2379_axes_0"), val = tensor([2])]; + tensor var_2379_cast_fp16 = squeeze(axes = var_2379_axes_0, x = gated_17_cast_fp16)[name = string("op_2379_cast_fp16")]; + tensor var_2383 = const()[name = string("op_2383"), val = tensor([0, 2, 1])]; + int32 var_2389 = const()[name = string("op_2389"), val = int32(-1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_57_cast_fp16 = transpose(perm = var_2383, x = var_2379_cast_fp16)[name = string("transpose_162")]; + tensor var_2391_cast_fp16 = mul(x = x_57_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2391_cast_fp16")]; + bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; + tensor input_89_cast_fp16 = concat(axis = var_2389, interleave = input_89_interleave_0, values = (x_57_cast_fp16, var_2391_cast_fp16))[name = string("input_89_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_2386_to_fp16 = const()[name = string("op_2386_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2386_to_fp16, x = input_89_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor var_2396_split_sizes_0 = const()[name = string("op_2396_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2396_axis_0 = const()[name = string("op_2396_axis_0"), val = int32(-1)]; + tensor var_2396_cast_fp16_0, tensor var_2396_cast_fp16_1 = split(axis = var_2396_axis_0, split_sizes = var_2396_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2396_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540171840)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_2396_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = tensor([0x1.58p-1])]; + tensor x_59_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_35_promoted_to_fp16)[name = string("x_59_cast_fp16")]; + tensor var_2408_axes_0 = const()[name = string("op_2408_axes_0"), val = tensor([0])]; + tensor var_2408_cast_fp16 = squeeze(axes = var_2408_axes_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2408_cast_fp16")]; + tensor var_2410_axes_0 = const()[name = string("op_2410_axes_0"), val = tensor([0])]; + tensor var_2410_cast_fp16 = squeeze(axes = var_2410_axes_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2410_cast_fp16")]; + tensor var_2413_begin_0 = const()[name = string("op_2413_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2413_end_0 = const()[name = string("op_2413_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2413_end_mask_0 = const()[name = string("op_2413_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2413_squeeze_mask_0 = const()[name = string("op_2413_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2413_cast_fp16 = slice_by_index(begin = var_2413_begin_0, end = var_2413_end_0, end_mask = var_2413_end_mask_0, squeeze_mask = var_2413_squeeze_mask_0, x = K_sliding_in)[name = string("op_2413_cast_fp16")]; + tensor K_sliding_slot_7_axes_0 = const()[name = string("K_sliding_slot_7_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_7_cast_fp16 = expand_dims(axes = K_sliding_slot_7_axes_0, x = var_2413_cast_fp16)[name = string("K_sliding_slot_7_cast_fp16")]; + tensor var_2418_begin_0 = const()[name = string("op_2418_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2418_end_0 = const()[name = string("op_2418_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2418_end_mask_0 = const()[name = string("op_2418_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2418_squeeze_mask_0 = const()[name = string("op_2418_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2418_cast_fp16 = slice_by_index(begin = var_2418_begin_0, end = var_2418_end_0, end_mask = var_2418_end_mask_0, squeeze_mask = var_2418_squeeze_mask_0, x = V_sliding_in)[name = string("op_2418_cast_fp16")]; + tensor V_sliding_slot_7_axes_0 = const()[name = string("V_sliding_slot_7_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_7_cast_fp16 = expand_dims(axes = V_sliding_slot_7_axes_0, x = var_2418_cast_fp16)[name = string("V_sliding_slot_7_cast_fp16")]; + int32 var_2425 = const()[name = string("op_2425"), val = int32(-1)]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2427_cast_fp16 = mul(x = x_59_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2427_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_2425, interleave = input_91_interleave_0, values = (x_59_cast_fp16, var_2427_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_2422_to_fp16 = const()[name = string("op_2422_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2422_to_fp16, x = input_91_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_2432_split_sizes_0 = const()[name = string("op_2432_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2432_axis_0 = const()[name = string("op_2432_axis_0"), val = int32(-1)]; + tensor var_2432_cast_fp16_0, tensor var_2432_cast_fp16_1 = split(axis = var_2432_axis_0, split_sizes = var_2432_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2432_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540177024)))]; + tensor h_19_cast_fp16 = mul(x = var_2432_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_2438 = const()[name = string("op_2438"), val = tensor([0, 2, 1])]; + tensor var_2441_axes_0 = const()[name = string("op_2441_axes_0"), val = tensor([2])]; + tensor var_2439_cast_fp16 = transpose(perm = var_2438, x = h_19_cast_fp16)[name = string("transpose_161")]; + tensor var_2441_cast_fp16 = expand_dims(axes = var_2441_axes_0, x = var_2439_cast_fp16)[name = string("op_2441_cast_fp16")]; + string var_2457_pad_type_0 = const()[name = string("op_2457_pad_type_0"), val = string("valid")]; + tensor var_2457_strides_0 = const()[name = string("op_2457_strides_0"), val = tensor([1, 1])]; + tensor var_2457_pad_0 = const()[name = string("op_2457_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2457_dilations_0 = const()[name = string("op_2457_dilations_0"), val = tensor([1, 1])]; + int32 var_2457_groups_0 = const()[name = string("op_2457_groups_0"), val = int32(1)]; + tensor var_2457 = conv(dilations = var_2457_dilations_0, groups = var_2457_groups_0, pad = var_2457_pad_0, pad_type = var_2457_pad_type_0, strides = var_2457_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_2441_cast_fp16)[name = string("op_2457")]; + tensor var_2462 = const()[name = string("op_2462"), val = tensor([1, 8, 256, 1])]; + tensor var_2463 = reshape(shape = var_2462, x = var_2457)[name = string("op_2463")]; + tensor var_2468 = const()[name = string("op_2468"), val = tensor([0, 1, 3, 2])]; + tensor var_2478 = const()[name = string("op_2478"), val = tensor([1, 8, 256])]; + tensor var_2469 = transpose(perm = var_2468, x = var_2463)[name = string("transpose_160")]; + tensor x_61 = reshape(shape = var_2478, x = var_2469)[name = string("x_61")]; + int32 var_2484 = const()[name = string("op_2484"), val = int32(-1)]; + fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; + tensor var_2486 = mul(x = x_61, y = const_37_promoted)[name = string("op_2486")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95 = concat(axis = var_2484, interleave = input_95_interleave_0, values = (x_61, var_2486))[name = string("input_95")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_2481_to_fp16 = const()[name = string("op_2481_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_2481_to_fp16, x = input_95)[name = string("normed_89_cast_fp16")]; + tensor var_2491_split_sizes_0 = const()[name = string("op_2491_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2491_axis_0 = const()[name = string("op_2491_axis_0"), val = int32(-1)]; + tensor var_2491_0, tensor var_2491_1 = split(axis = var_2491_axis_0, split_sizes = var_2491_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_2491")]; + tensor var_2493 = mul(x = var_2491_0, y = layers_3_self_attn_q_norm_weight)[name = string("op_2493")]; + tensor var_2498 = const()[name = string("op_2498"), val = tensor([1, 8, 1, 256])]; + tensor q_27 = reshape(shape = var_2498, x = var_2493)[name = string("q_27")]; + tensor var_2500_cast_fp16 = mul(x = q_27, y = cos_s)[name = string("op_2500_cast_fp16")]; + tensor var_2501_split_sizes_0 = const()[name = string("op_2501_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2501_axis_0 = const()[name = string("op_2501_axis_0"), val = int32(-1)]; + tensor var_2501_0, tensor var_2501_1 = split(axis = var_2501_axis_0, split_sizes = var_2501_split_sizes_0, x = q_27)[name = string("op_2501")]; + fp16 const_38_promoted = const()[name = string("const_38_promoted"), val = fp16(-0x1p+0)]; + tensor var_2503 = mul(x = var_2501_1, y = const_38_promoted)[name = string("op_2503")]; + int32 var_2505 = const()[name = string("op_2505"), val = int32(-1)]; + bool var_2506_interleave_0 = const()[name = string("op_2506_interleave_0"), val = bool(false)]; + tensor var_2506 = concat(axis = var_2505, interleave = var_2506_interleave_0, values = (var_2503, var_2501_0))[name = string("op_2506")]; + tensor var_2507_cast_fp16 = mul(x = var_2506, y = sin_s)[name = string("op_2507_cast_fp16")]; + tensor q_31_cast_fp16 = add(x = var_2500_cast_fp16, y = var_2507_cast_fp16)[name = string("q_31_cast_fp16")]; + string var_2520_pad_type_0 = const()[name = string("op_2520_pad_type_0"), val = string("valid")]; + tensor var_2520_strides_0 = const()[name = string("op_2520_strides_0"), val = tensor([1, 1])]; + tensor var_2520_pad_0 = const()[name = string("op_2520_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2520_dilations_0 = const()[name = string("op_2520_dilations_0"), val = tensor([1, 1])]; + int32 var_2520_groups_0 = const()[name = string("op_2520_groups_0"), val = int32(1)]; + tensor var_2520 = conv(dilations = var_2520_dilations_0, groups = var_2520_groups_0, pad = var_2520_pad_0, pad_type = var_2520_pad_type_0, strides = var_2520_strides_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = var_2441_cast_fp16)[name = string("op_2520")]; + tensor var_2525 = const()[name = string("op_2525"), val = tensor([1, 2, 256, 1])]; + tensor var_2526 = reshape(shape = var_2525, x = var_2520)[name = string("op_2526")]; + tensor var_2531 = const()[name = string("op_2531"), val = tensor([0, 1, 3, 2])]; + string var_2548_pad_type_0 = const()[name = string("op_2548_pad_type_0"), val = string("valid")]; + tensor var_2548_strides_0 = const()[name = string("op_2548_strides_0"), val = tensor([1, 1])]; + tensor var_2548_pad_0 = const()[name = string("op_2548_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2548_dilations_0 = const()[name = string("op_2548_dilations_0"), val = tensor([1, 1])]; + int32 var_2548_groups_0 = const()[name = string("op_2548_groups_0"), val = int32(1)]; + tensor var_2548 = conv(dilations = var_2548_dilations_0, groups = var_2548_groups_0, pad = var_2548_pad_0, pad_type = var_2548_pad_type_0, strides = var_2548_strides_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = var_2441_cast_fp16)[name = string("op_2548")]; + tensor var_2553 = const()[name = string("op_2553"), val = tensor([1, 2, 256, 1])]; + tensor var_2554 = reshape(shape = var_2553, x = var_2548)[name = string("op_2554")]; + tensor var_2559 = const()[name = string("op_2559"), val = tensor([0, 1, 3, 2])]; + tensor var_2569 = const()[name = string("op_2569"), val = tensor([1, 2, 256])]; + tensor var_2532 = transpose(perm = var_2531, x = var_2526)[name = string("transpose_159")]; + tensor x_63 = reshape(shape = var_2569, x = var_2532)[name = string("x_63")]; + int32 var_2575 = const()[name = string("op_2575"), val = int32(-1)]; + fp16 const_39_promoted = const()[name = string("const_39_promoted"), val = fp16(-0x1p+0)]; + tensor var_2577 = mul(x = x_63, y = const_39_promoted)[name = string("op_2577")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97 = concat(axis = var_2575, interleave = input_97_interleave_0, values = (x_63, var_2577))[name = string("input_97")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_2572_to_fp16 = const()[name = string("op_2572_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_2572_to_fp16, x = input_97)[name = string("normed_93_cast_fp16")]; + tensor var_2582_split_sizes_0 = const()[name = string("op_2582_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2582_axis_0 = const()[name = string("op_2582_axis_0"), val = int32(-1)]; + tensor var_2582_0, tensor var_2582_1 = split(axis = var_2582_axis_0, split_sizes = var_2582_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_2582")]; + tensor var_2584 = mul(x = var_2582_0, y = layers_3_self_attn_k_norm_weight)[name = string("op_2584")]; + tensor var_2589 = const()[name = string("op_2589"), val = tensor([1, 2, 1, 256])]; + tensor q_29 = reshape(shape = var_2589, x = var_2584)[name = string("q_29")]; + fp16 var_2591_promoted = const()[name = string("op_2591_promoted"), val = fp16(0x1p+1)]; + tensor var_2560 = transpose(perm = var_2559, x = var_2554)[name = string("transpose_158")]; + tensor var_2592 = pow(x = var_2560, y = var_2591_promoted)[name = string("op_2592")]; + tensor var_2597_axes_0 = const()[name = string("op_2597_axes_0"), val = tensor([-1])]; + bool var_2597_keep_dims_0 = const()[name = string("op_2597_keep_dims_0"), val = bool(true)]; + tensor var_2597 = reduce_mean(axes = var_2597_axes_0, keep_dims = var_2597_keep_dims_0, x = var_2592)[name = string("op_2597")]; + fp16 var_2599_to_fp16 = const()[name = string("op_2599_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_7_cast_fp16 = add(x = var_2597, y = var_2599_to_fp16)[name = string("mean_sq_7_cast_fp16")]; + fp32 var_2601_epsilon_0 = const()[name = string("op_2601_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2601_cast_fp16 = rsqrt(epsilon = var_2601_epsilon_0, x = mean_sq_7_cast_fp16)[name = string("op_2601_cast_fp16")]; + tensor input_101_cast_fp16 = mul(x = var_2560, y = var_2601_cast_fp16)[name = string("input_101_cast_fp16")]; + tensor var_2603_cast_fp16 = mul(x = q_29, y = cos_s)[name = string("op_2603_cast_fp16")]; + tensor var_2604_split_sizes_0 = const()[name = string("op_2604_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2604_axis_0 = const()[name = string("op_2604_axis_0"), val = int32(-1)]; + tensor var_2604_0, tensor var_2604_1 = split(axis = var_2604_axis_0, split_sizes = var_2604_split_sizes_0, x = q_29)[name = string("op_2604")]; + fp16 const_40_promoted = const()[name = string("const_40_promoted"), val = fp16(-0x1p+0)]; + tensor var_2606 = mul(x = var_2604_1, y = const_40_promoted)[name = string("op_2606")]; + int32 var_2608 = const()[name = string("op_2608"), val = int32(-1)]; + bool var_2609_interleave_0 = const()[name = string("op_2609_interleave_0"), val = bool(false)]; + tensor var_2609 = concat(axis = var_2608, interleave = var_2609_interleave_0, values = (var_2606, var_2604_0))[name = string("op_2609")]; + tensor var_2610_cast_fp16 = mul(x = var_2609, y = sin_s)[name = string("op_2610_cast_fp16")]; + tensor input_99_cast_fp16 = add(x = var_2603_cast_fp16, y = var_2610_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor k_padded_7_pad_0 = const()[name = string("k_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_7_mode_0 = const()[name = string("k_padded_7_mode_0"), val = string("constant")]; + fp16 const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_7_cast_fp16 = pad(constant_val = const_41_to_fp16, mode = k_padded_7_mode_0, pad = k_padded_7_pad_0, x = input_99_cast_fp16)[name = string("k_padded_7_cast_fp16")]; + tensor v_padded_7_pad_0 = const()[name = string("v_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_7_mode_0 = const()[name = string("v_padded_7_mode_0"), val = string("constant")]; + fp16 const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_7_cast_fp16 = pad(constant_val = const_42_to_fp16, mode = v_padded_7_mode_0, pad = v_padded_7_pad_0, x = input_101_cast_fp16)[name = string("v_padded_7_cast_fp16")]; + tensor var_2639_begin_0 = const()[name = string("op_2639_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2639_end_0 = const()[name = string("op_2639_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2639_end_mask_0 = const()[name = string("op_2639_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2639_cast_fp16 = slice_by_index(begin = var_2639_begin_0, end = var_2639_end_0, end_mask = var_2639_end_mask_0, x = K_sliding_slot_7_cast_fp16)[name = string("op_2639_cast_fp16")]; + int32 var_2646 = const()[name = string("op_2646"), val = int32(2)]; + bool K_sliding_out_7_interleave_0 = const()[name = string("K_sliding_out_7_interleave_0"), val = bool(false)]; + tensor K_sliding_out_7_cast_fp16 = concat(axis = var_2646, interleave = K_sliding_out_7_interleave_0, values = (var_2639_cast_fp16, k_padded_7_cast_fp16))[name = string("K_sliding_out_7_cast_fp16")]; + tensor var_2662_begin_0 = const()[name = string("op_2662_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2662_end_0 = const()[name = string("op_2662_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2662_end_mask_0 = const()[name = string("op_2662_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2662_cast_fp16 = slice_by_index(begin = var_2662_begin_0, end = var_2662_end_0, end_mask = var_2662_end_mask_0, x = V_sliding_slot_7_cast_fp16)[name = string("op_2662_cast_fp16")]; + int32 var_2669 = const()[name = string("op_2669"), val = int32(2)]; + bool V_sliding_out_7_interleave_0 = const()[name = string("V_sliding_out_7_interleave_0"), val = bool(false)]; + tensor V_sliding_out_7_cast_fp16 = concat(axis = var_2669, interleave = V_sliding_out_7_interleave_0, values = (var_2662_cast_fp16, v_padded_7_cast_fp16))[name = string("V_sliding_out_7_cast_fp16")]; + tensor K_for_attn_7_begin_0 = const()[name = string("K_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_7_end_0 = const()[name = string("K_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_7_end_mask_0 = const()[name = string("K_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_7_cast_fp16 = slice_by_index(begin = K_for_attn_7_begin_0, end = K_for_attn_7_end_0, end_mask = K_for_attn_7_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("K_for_attn_7_cast_fp16")]; + tensor V_for_attn_7_begin_0 = const()[name = string("V_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_7_end_0 = const()[name = string("V_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_7_end_mask_0 = const()[name = string("V_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_7_cast_fp16 = slice_by_index(begin = V_for_attn_7_begin_0, end = V_for_attn_7_end_0, end_mask = V_for_attn_7_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("V_for_attn_7_cast_fp16")]; + tensor transpose_12_perm_0 = const()[name = string("transpose_12_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_6_reps_0 = const()[name = string("tile_6_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = K_for_attn_7_cast_fp16)[name = string("transpose_157")]; + tensor tile_6_cast_fp16 = tile(reps = tile_6_reps_0, x = transpose_12_cast_fp16)[name = string("tile_6_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_12, x = tile_6_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = reshape_12_cast_fp16)[name = string("transpose_156")]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_13, x = transpose_13_cast_fp16)[name = string("reshape_13_cast_fp16")]; + tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_14_perm_0 = const()[name = string("transpose_14_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_7_reps_0 = const()[name = string("tile_7_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = V_for_attn_7_cast_fp16)[name = string("transpose_155")]; + tensor tile_7_cast_fp16 = tile(reps = tile_7_reps_0, x = transpose_14_cast_fp16)[name = string("tile_7_cast_fp16")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_14, x = tile_7_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor transpose_15_perm_0 = const()[name = string("transpose_15_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_154")]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_15, x = transpose_15_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor V_expanded_7_perm_0 = const()[name = string("V_expanded_7_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor transpose_51_cast_fp16 = transpose(perm = transpose_51_perm_0, x = reshape_13_cast_fp16)[name = string("transpose_153")]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_31_cast_fp16, y = transpose_51_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_3")]; + tensor var_2710 = sub(x = x_67_cast_fp16, y = reduce_max_3)[name = string("op_2710")]; + tensor var_2716 = exp(x = var_2710)[name = string("op_2716")]; + tensor var_2726_axes_0 = const()[name = string("op_2726_axes_0"), val = tensor([-1])]; + bool var_2726_keep_dims_0 = const()[name = string("op_2726_keep_dims_0"), val = bool(true)]; + tensor var_2726 = reduce_sum(axes = var_2726_axes_0, keep_dims = var_2726_keep_dims_0, x = var_2716)[name = string("op_2726")]; + tensor var_2732_cast_fp16 = real_div(x = var_2716, y = var_2726)[name = string("op_2732_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor V_expanded_7_cast_fp16 = transpose(perm = V_expanded_7_perm_0, x = reshape_15_cast_fp16)[name = string("transpose_152")]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_2732_cast_fp16, y = V_expanded_7_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_2743 = const()[name = string("op_2743"), val = tensor([0, 2, 1, 3])]; + tensor var_2750 = const()[name = string("op_2750"), val = tensor([1, 1, -1])]; + tensor var_2744_cast_fp16 = transpose(perm = var_2743, x = attn_output_19_cast_fp16)[name = string("transpose_151")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_2750, x = var_2744_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_2755 = const()[name = string("op_2755"), val = tensor([0, 2, 1])]; + string var_2771_pad_type_0 = const()[name = string("op_2771_pad_type_0"), val = string("valid")]; + int32 var_2771_groups_0 = const()[name = string("op_2771_groups_0"), val = int32(1)]; + tensor var_2771_strides_0 = const()[name = string("op_2771_strides_0"), val = tensor([1])]; + tensor var_2771_pad_0 = const()[name = string("op_2771_pad_0"), val = tensor([0, 0])]; + tensor var_2771_dilations_0 = const()[name = string("op_2771_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540182208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542803712))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2756_cast_fp16 = transpose(perm = var_2755, x = attn_output_21_cast_fp16)[name = string("transpose_150")]; + tensor var_2771_cast_fp16 = conv(dilations = var_2771_dilations_0, groups = var_2771_groups_0, pad = var_2771_pad_0, pad_type = var_2771_pad_type_0, strides = var_2771_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_2756_cast_fp16)[name = string("op_2771_cast_fp16")]; + tensor var_2775 = const()[name = string("op_2775"), val = tensor([0, 2, 1])]; + int32 var_2781 = const()[name = string("op_2781"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_2775, x = var_2771_cast_fp16)[name = string("transpose_149")]; + tensor var_2783_cast_fp16 = mul(x = x_71_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2783_cast_fp16")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105_cast_fp16 = concat(axis = var_2781, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2783_cast_fp16))[name = string("input_105_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_2778_to_fp16 = const()[name = string("op_2778_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_2778_to_fp16, x = input_105_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor var_2788_split_sizes_0 = const()[name = string("op_2788_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2788_axis_0 = const()[name = string("op_2788_axis_0"), val = int32(-1)]; + tensor var_2788_cast_fp16_0, tensor var_2788_cast_fp16_1 = split(axis = var_2788_axis_0, split_sizes = var_2788_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2788_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542806336)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_2788_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_59_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_2797 = const()[name = string("op_2797"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2799_cast_fp16 = mul(x = x_73_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2799_cast_fp16")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107_cast_fp16 = concat(axis = var_2797, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2799_cast_fp16))[name = string("input_107_cast_fp16")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_2794_to_fp16 = const()[name = string("op_2794_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2794_to_fp16, x = input_107_cast_fp16)[name = string("normed_101_cast_fp16")]; + tensor var_2804_split_sizes_0 = const()[name = string("op_2804_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2804_axis_0 = const()[name = string("op_2804_axis_0"), val = int32(-1)]; + tensor var_2804_cast_fp16_0, tensor var_2804_cast_fp16_1 = split(axis = var_2804_axis_0, split_sizes = var_2804_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2804_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542811520)))]; + tensor h_21_cast_fp16 = mul(x = var_2804_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_2815 = const()[name = string("op_2815"), val = tensor([0, 2, 1])]; + tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([2])]; + tensor var_2816 = transpose(perm = var_2815, x = h_21_cast_fp16)[name = string("transpose_148")]; + tensor input_109 = expand_dims(axes = input_109_axes_0, x = var_2816)[name = string("input_109")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_111 = mul(x = gate_15, y = up_7)[name = string("input_111")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_7")]; + tensor var_2856_axes_0 = const()[name = string("op_2856_axes_0"), val = tensor([2])]; + tensor var_2856 = squeeze(axes = var_2856_axes_0, x = mlp_out_7)[name = string("op_2856")]; + tensor var_2860 = const()[name = string("op_2860"), val = tensor([0, 2, 1])]; + int32 var_2866 = const()[name = string("op_2866"), val = int32(-1)]; + fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_2860, x = var_2856)[name = string("transpose_147")]; + tensor var_2868 = mul(x = x_75, y = const_45_promoted)[name = string("op_2868")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_2866, interleave = input_113_interleave_0, values = (x_75, var_2868))[name = string("input_113")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_2863_to_fp16 = const()[name = string("op_2863_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2863_to_fp16, x = input_113)[name = string("normed_105_cast_fp16")]; + tensor var_2873_split_sizes_0 = const()[name = string("op_2873_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2873_axis_0 = const()[name = string("op_2873_axis_0"), val = int32(-1)]; + tensor var_2873_0, tensor var_2873_1 = split(axis = var_2873_axis_0, split_sizes = var_2873_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2873")]; + tensor hidden_states_33 = mul(x = var_2873_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 3840])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 1, 4096])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_2901 = const()[name = string("op_2901"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_2902 = transpose(perm = var_2901, x = hidden_states_35_cast_fp16)[name = string("transpose_146")]; + tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_2902)[name = string("input_115")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_2921 = const()[name = string("op_2921"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_2922_cast_fp16 = transpose(perm = var_2921, x = per_layer_slice_7_cast_fp16)[name = string("transpose_145")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_2922_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_117_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542816704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543144448))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_2938_axes_0 = const()[name = string("op_2938_axes_0"), val = tensor([2])]; + tensor var_2938_cast_fp16 = squeeze(axes = var_2938_axes_0, x = gated_23_cast_fp16)[name = string("op_2938_cast_fp16")]; + tensor var_2942 = const()[name = string("op_2942"), val = tensor([0, 2, 1])]; + int32 var_2948 = const()[name = string("op_2948"), val = int32(-1)]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_2942, x = var_2938_cast_fp16)[name = string("transpose_144")]; + tensor var_2950_cast_fp16 = mul(x = x_77_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2950_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_2948, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2950_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_2945_to_fp16 = const()[name = string("op_2945_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2945_to_fp16, x = input_119_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor var_2955_split_sizes_0 = const()[name = string("op_2955_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2955_axis_0 = const()[name = string("op_2955_axis_0"), val = int32(-1)]; + tensor var_2955_cast_fp16_0, tensor var_2955_cast_fp16_1 = split(axis = var_2955_axis_0, split_sizes = var_2955_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2955_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543147072)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_2955_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor([0x1.14p-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + tensor var_2967_axes_0 = const()[name = string("op_2967_axes_0"), val = tensor([0])]; + tensor var_2967_cast_fp16 = squeeze(axes = var_2967_axes_0, x = K_sliding_out_7_cast_fp16)[name = string("op_2967_cast_fp16")]; + tensor var_2969_axes_0 = const()[name = string("op_2969_axes_0"), val = tensor([0])]; + tensor var_2969_cast_fp16 = squeeze(axes = var_2969_axes_0, x = V_sliding_out_7_cast_fp16)[name = string("op_2969_cast_fp16")]; + tensor var_2972_begin_0 = const()[name = string("op_2972_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_2972_end_0 = const()[name = string("op_2972_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_2972_end_mask_0 = const()[name = string("op_2972_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2972_squeeze_mask_0 = const()[name = string("op_2972_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2972_cast_fp16 = slice_by_index(begin = var_2972_begin_0, end = var_2972_end_0, end_mask = var_2972_end_mask_0, squeeze_mask = var_2972_squeeze_mask_0, x = K_sliding_in)[name = string("op_2972_cast_fp16")]; + tensor K_sliding_slot_9_axes_0 = const()[name = string("K_sliding_slot_9_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_9_cast_fp16 = expand_dims(axes = K_sliding_slot_9_axes_0, x = var_2972_cast_fp16)[name = string("K_sliding_slot_9_cast_fp16")]; + tensor var_2977_begin_0 = const()[name = string("op_2977_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_2977_end_0 = const()[name = string("op_2977_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_2977_end_mask_0 = const()[name = string("op_2977_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2977_squeeze_mask_0 = const()[name = string("op_2977_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2977_cast_fp16 = slice_by_index(begin = var_2977_begin_0, end = var_2977_end_0, end_mask = var_2977_end_mask_0, squeeze_mask = var_2977_squeeze_mask_0, x = V_sliding_in)[name = string("op_2977_cast_fp16")]; + tensor V_sliding_slot_9_axes_0 = const()[name = string("V_sliding_slot_9_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_9_cast_fp16 = expand_dims(axes = V_sliding_slot_9_axes_0, x = var_2977_cast_fp16)[name = string("V_sliding_slot_9_cast_fp16")]; + int32 var_2984 = const()[name = string("op_2984"), val = int32(-1)]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2986_cast_fp16 = mul(x = x_79_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2986_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_2984, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2986_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_2981_to_fp16 = const()[name = string("op_2981_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2981_to_fp16, x = input_121_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor var_2991_split_sizes_0 = const()[name = string("op_2991_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2991_axis_0 = const()[name = string("op_2991_axis_0"), val = int32(-1)]; + tensor var_2991_cast_fp16_0, tensor var_2991_cast_fp16_1 = split(axis = var_2991_axis_0, split_sizes = var_2991_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2991_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543152256)))]; + tensor h_25_cast_fp16 = mul(x = var_2991_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_2997 = const()[name = string("op_2997"), val = tensor([0, 2, 1])]; + tensor var_3000_axes_0 = const()[name = string("op_3000_axes_0"), val = tensor([2])]; + tensor var_2998_cast_fp16 = transpose(perm = var_2997, x = h_25_cast_fp16)[name = string("transpose_143")]; + tensor var_3000_cast_fp16 = expand_dims(axes = var_3000_axes_0, x = var_2998_cast_fp16)[name = string("op_3000_cast_fp16")]; + string var_3016_pad_type_0 = const()[name = string("op_3016_pad_type_0"), val = string("valid")]; + tensor var_3016_strides_0 = const()[name = string("op_3016_strides_0"), val = tensor([1, 1])]; + tensor var_3016_pad_0 = const()[name = string("op_3016_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3016_dilations_0 = const()[name = string("op_3016_dilations_0"), val = tensor([1, 1])]; + int32 var_3016_groups_0 = const()[name = string("op_3016_groups_0"), val = int32(1)]; + tensor var_3016 = conv(dilations = var_3016_dilations_0, groups = var_3016_groups_0, pad = var_3016_pad_0, pad_type = var_3016_pad_type_0, strides = var_3016_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_3000_cast_fp16)[name = string("op_3016")]; + tensor var_3021 = const()[name = string("op_3021"), val = tensor([1, 8, 256, 1])]; + tensor var_3022 = reshape(shape = var_3021, x = var_3016)[name = string("op_3022")]; + tensor var_3027 = const()[name = string("op_3027"), val = tensor([0, 1, 3, 2])]; + tensor var_3037 = const()[name = string("op_3037"), val = tensor([1, 8, 256])]; + tensor var_3028 = transpose(perm = var_3027, x = var_3022)[name = string("transpose_142")]; + tensor x_81 = reshape(shape = var_3037, x = var_3028)[name = string("x_81")]; + int32 var_3043 = const()[name = string("op_3043"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor var_3045 = mul(x = x_81, y = const_49_promoted)[name = string("op_3045")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_3043, interleave = input_125_interleave_0, values = (x_81, var_3045))[name = string("input_125")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_3040_to_fp16 = const()[name = string("op_3040_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_3040_to_fp16, x = input_125)[name = string("normed_117_cast_fp16")]; + tensor var_3050_split_sizes_0 = const()[name = string("op_3050_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3050_axis_0 = const()[name = string("op_3050_axis_0"), val = int32(-1)]; + tensor var_3050_0, tensor var_3050_1 = split(axis = var_3050_axis_0, split_sizes = var_3050_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_3050")]; + tensor var_3052 = mul(x = var_3050_0, y = layers_4_self_attn_q_norm_weight)[name = string("op_3052")]; + tensor var_3057 = const()[name = string("op_3057"), val = tensor([1, 8, 1, 256])]; + tensor q_35 = reshape(shape = var_3057, x = var_3052)[name = string("q_35")]; + tensor var_3059_cast_fp16 = mul(x = q_35, y = cos_s)[name = string("op_3059_cast_fp16")]; + tensor var_3060_split_sizes_0 = const()[name = string("op_3060_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3060_axis_0 = const()[name = string("op_3060_axis_0"), val = int32(-1)]; + tensor var_3060_0, tensor var_3060_1 = split(axis = var_3060_axis_0, split_sizes = var_3060_split_sizes_0, x = q_35)[name = string("op_3060")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_3062 = mul(x = var_3060_1, y = const_50_promoted)[name = string("op_3062")]; + int32 var_3064 = const()[name = string("op_3064"), val = int32(-1)]; + bool var_3065_interleave_0 = const()[name = string("op_3065_interleave_0"), val = bool(false)]; + tensor var_3065 = concat(axis = var_3064, interleave = var_3065_interleave_0, values = (var_3062, var_3060_0))[name = string("op_3065")]; + tensor var_3066_cast_fp16 = mul(x = var_3065, y = sin_s)[name = string("op_3066_cast_fp16")]; + tensor q_39_cast_fp16 = add(x = var_3059_cast_fp16, y = var_3066_cast_fp16)[name = string("q_39_cast_fp16")]; + string var_3079_pad_type_0 = const()[name = string("op_3079_pad_type_0"), val = string("valid")]; + tensor var_3079_strides_0 = const()[name = string("op_3079_strides_0"), val = tensor([1, 1])]; + tensor var_3079_pad_0 = const()[name = string("op_3079_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3079_dilations_0 = const()[name = string("op_3079_dilations_0"), val = tensor([1, 1])]; + int32 var_3079_groups_0 = const()[name = string("op_3079_groups_0"), val = int32(1)]; + tensor var_3079 = conv(dilations = var_3079_dilations_0, groups = var_3079_groups_0, pad = var_3079_pad_0, pad_type = var_3079_pad_type_0, strides = var_3079_strides_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = var_3000_cast_fp16)[name = string("op_3079")]; + tensor var_3084 = const()[name = string("op_3084"), val = tensor([1, 2, 256, 1])]; + tensor var_3085 = reshape(shape = var_3084, x = var_3079)[name = string("op_3085")]; + tensor var_3090 = const()[name = string("op_3090"), val = tensor([0, 1, 3, 2])]; + string var_3107_pad_type_0 = const()[name = string("op_3107_pad_type_0"), val = string("valid")]; + tensor var_3107_strides_0 = const()[name = string("op_3107_strides_0"), val = tensor([1, 1])]; + tensor var_3107_pad_0 = const()[name = string("op_3107_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3107_dilations_0 = const()[name = string("op_3107_dilations_0"), val = tensor([1, 1])]; + int32 var_3107_groups_0 = const()[name = string("op_3107_groups_0"), val = int32(1)]; + tensor var_3107 = conv(dilations = var_3107_dilations_0, groups = var_3107_groups_0, pad = var_3107_pad_0, pad_type = var_3107_pad_type_0, strides = var_3107_strides_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = var_3000_cast_fp16)[name = string("op_3107")]; + tensor var_3112 = const()[name = string("op_3112"), val = tensor([1, 2, 256, 1])]; + tensor var_3113 = reshape(shape = var_3112, x = var_3107)[name = string("op_3113")]; + tensor var_3118 = const()[name = string("op_3118"), val = tensor([0, 1, 3, 2])]; + tensor var_3128 = const()[name = string("op_3128"), val = tensor([1, 2, 256])]; + tensor var_3091 = transpose(perm = var_3090, x = var_3085)[name = string("transpose_141")]; + tensor x_83 = reshape(shape = var_3128, x = var_3091)[name = string("x_83")]; + int32 var_3134 = const()[name = string("op_3134"), val = int32(-1)]; + fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; + tensor var_3136 = mul(x = x_83, y = const_51_promoted)[name = string("op_3136")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127 = concat(axis = var_3134, interleave = input_127_interleave_0, values = (x_83, var_3136))[name = string("input_127")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_3131_to_fp16 = const()[name = string("op_3131_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_3131_to_fp16, x = input_127)[name = string("normed_121_cast_fp16")]; + tensor var_3141_split_sizes_0 = const()[name = string("op_3141_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3141_axis_0 = const()[name = string("op_3141_axis_0"), val = int32(-1)]; + tensor var_3141_0, tensor var_3141_1 = split(axis = var_3141_axis_0, split_sizes = var_3141_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_3141")]; + tensor var_3143 = mul(x = var_3141_0, y = layers_4_self_attn_k_norm_weight)[name = string("op_3143")]; + tensor var_3148 = const()[name = string("op_3148"), val = tensor([1, 2, 1, 256])]; + tensor q_37 = reshape(shape = var_3148, x = var_3143)[name = string("q_37")]; + fp16 var_3150_promoted = const()[name = string("op_3150_promoted"), val = fp16(0x1p+1)]; + tensor var_3119 = transpose(perm = var_3118, x = var_3113)[name = string("transpose_140")]; + tensor var_3151 = pow(x = var_3119, y = var_3150_promoted)[name = string("op_3151")]; + tensor var_3156_axes_0 = const()[name = string("op_3156_axes_0"), val = tensor([-1])]; + bool var_3156_keep_dims_0 = const()[name = string("op_3156_keep_dims_0"), val = bool(true)]; + tensor var_3156 = reduce_mean(axes = var_3156_axes_0, keep_dims = var_3156_keep_dims_0, x = var_3151)[name = string("op_3156")]; + fp16 var_3158_to_fp16 = const()[name = string("op_3158_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_9_cast_fp16 = add(x = var_3156, y = var_3158_to_fp16)[name = string("mean_sq_9_cast_fp16")]; + fp32 var_3160_epsilon_0 = const()[name = string("op_3160_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3160_cast_fp16 = rsqrt(epsilon = var_3160_epsilon_0, x = mean_sq_9_cast_fp16)[name = string("op_3160_cast_fp16")]; + tensor input_131_cast_fp16 = mul(x = var_3119, y = var_3160_cast_fp16)[name = string("input_131_cast_fp16")]; + tensor var_3162_cast_fp16 = mul(x = q_37, y = cos_s)[name = string("op_3162_cast_fp16")]; + tensor var_3163_split_sizes_0 = const()[name = string("op_3163_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3163_axis_0 = const()[name = string("op_3163_axis_0"), val = int32(-1)]; + tensor var_3163_0, tensor var_3163_1 = split(axis = var_3163_axis_0, split_sizes = var_3163_split_sizes_0, x = q_37)[name = string("op_3163")]; + fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)]; + tensor var_3165 = mul(x = var_3163_1, y = const_52_promoted)[name = string("op_3165")]; + int32 var_3167 = const()[name = string("op_3167"), val = int32(-1)]; + bool var_3168_interleave_0 = const()[name = string("op_3168_interleave_0"), val = bool(false)]; + tensor var_3168 = concat(axis = var_3167, interleave = var_3168_interleave_0, values = (var_3165, var_3163_0))[name = string("op_3168")]; + tensor var_3169_cast_fp16 = mul(x = var_3168, y = sin_s)[name = string("op_3169_cast_fp16")]; + tensor input_129_cast_fp16 = add(x = var_3162_cast_fp16, y = var_3169_cast_fp16)[name = string("input_129_cast_fp16")]; + tensor k_padded_9_pad_0 = const()[name = string("k_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_9_mode_0 = const()[name = string("k_padded_9_mode_0"), val = string("constant")]; + fp16 const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_9_cast_fp16 = pad(constant_val = const_53_to_fp16, mode = k_padded_9_mode_0, pad = k_padded_9_pad_0, x = input_129_cast_fp16)[name = string("k_padded_9_cast_fp16")]; + tensor v_padded_9_pad_0 = const()[name = string("v_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_9_mode_0 = const()[name = string("v_padded_9_mode_0"), val = string("constant")]; + fp16 const_54_to_fp16 = const()[name = string("const_54_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_9_cast_fp16 = pad(constant_val = const_54_to_fp16, mode = v_padded_9_mode_0, pad = v_padded_9_pad_0, x = input_131_cast_fp16)[name = string("v_padded_9_cast_fp16")]; + tensor var_3198_begin_0 = const()[name = string("op_3198_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_3198_end_0 = const()[name = string("op_3198_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3198_end_mask_0 = const()[name = string("op_3198_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3198_cast_fp16 = slice_by_index(begin = var_3198_begin_0, end = var_3198_end_0, end_mask = var_3198_end_mask_0, x = K_sliding_slot_9_cast_fp16)[name = string("op_3198_cast_fp16")]; + int32 var_3205 = const()[name = string("op_3205"), val = int32(2)]; + bool K_sliding_out_9_interleave_0 = const()[name = string("K_sliding_out_9_interleave_0"), val = bool(false)]; + tensor K_sliding_out_9_cast_fp16 = concat(axis = var_3205, interleave = K_sliding_out_9_interleave_0, values = (var_3198_cast_fp16, k_padded_9_cast_fp16))[name = string("K_sliding_out_9_cast_fp16")]; + tensor var_3221_begin_0 = const()[name = string("op_3221_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_3221_end_0 = const()[name = string("op_3221_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3221_end_mask_0 = const()[name = string("op_3221_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3221_cast_fp16 = slice_by_index(begin = var_3221_begin_0, end = var_3221_end_0, end_mask = var_3221_end_mask_0, x = V_sliding_slot_9_cast_fp16)[name = string("op_3221_cast_fp16")]; + int32 var_3228 = const()[name = string("op_3228"), val = int32(2)]; + bool V_sliding_out_9_interleave_0 = const()[name = string("V_sliding_out_9_interleave_0"), val = bool(false)]; + tensor V_sliding_out_9_cast_fp16 = concat(axis = var_3228, interleave = V_sliding_out_9_interleave_0, values = (var_3221_cast_fp16, v_padded_9_cast_fp16))[name = string("V_sliding_out_9_cast_fp16")]; + tensor K_for_attn_9_begin_0 = const()[name = string("K_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_9_end_0 = const()[name = string("K_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_9_end_mask_0 = const()[name = string("K_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_9_cast_fp16 = slice_by_index(begin = K_for_attn_9_begin_0, end = K_for_attn_9_end_0, end_mask = K_for_attn_9_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("K_for_attn_9_cast_fp16")]; + tensor V_for_attn_9_begin_0 = const()[name = string("V_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_9_end_0 = const()[name = string("V_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_9_end_mask_0 = const()[name = string("V_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_9_cast_fp16 = slice_by_index(begin = V_for_attn_9_begin_0, end = V_for_attn_9_end_0, end_mask = V_for_attn_9_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("V_for_attn_9_cast_fp16")]; + tensor transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = K_for_attn_9_cast_fp16)[name = string("transpose_139")]; + tensor tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_16, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")]; + tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_138")]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_17, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = V_for_attn_9_cast_fp16)[name = string("transpose_137")]; + tensor tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_18, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_136")]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_19, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")]; + tensor V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor transpose_52_cast_fp16 = transpose(perm = transpose_52_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_135")]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_39_cast_fp16, y = transpose_52_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_87_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_87_cast_fp16)[name = string("reduce_max_4")]; + tensor var_3269 = sub(x = x_87_cast_fp16, y = reduce_max_4)[name = string("op_3269")]; + tensor var_3275 = exp(x = var_3269)[name = string("op_3275")]; + tensor var_3285_axes_0 = const()[name = string("op_3285_axes_0"), val = tensor([-1])]; + bool var_3285_keep_dims_0 = const()[name = string("op_3285_keep_dims_0"), val = bool(true)]; + tensor var_3285 = reduce_sum(axes = var_3285_axes_0, keep_dims = var_3285_keep_dims_0, x = var_3275)[name = string("op_3285")]; + tensor var_3291_cast_fp16 = real_div(x = var_3275, y = var_3285)[name = string("op_3291_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_134")]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_3291_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3302 = const()[name = string("op_3302"), val = tensor([0, 2, 1, 3])]; + tensor var_3309 = const()[name = string("op_3309"), val = tensor([1, 1, -1])]; + tensor var_3303_cast_fp16 = transpose(perm = var_3302, x = attn_output_25_cast_fp16)[name = string("transpose_133")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_3309, x = var_3303_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_3314 = const()[name = string("op_3314"), val = tensor([0, 2, 1])]; + string var_3330_pad_type_0 = const()[name = string("op_3330_pad_type_0"), val = string("valid")]; + int32 var_3330_groups_0 = const()[name = string("op_3330_groups_0"), val = int32(1)]; + tensor var_3330_strides_0 = const()[name = string("op_3330_strides_0"), val = tensor([1])]; + tensor var_3330_pad_0 = const()[name = string("op_3330_pad_0"), val = tensor([0, 0])]; + tensor var_3330_dilations_0 = const()[name = string("op_3330_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543157440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545778944))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3315_cast_fp16 = transpose(perm = var_3314, x = attn_output_27_cast_fp16)[name = string("transpose_132")]; + tensor var_3330_cast_fp16 = conv(dilations = var_3330_dilations_0, groups = var_3330_groups_0, pad = var_3330_pad_0, pad_type = var_3330_pad_type_0, strides = var_3330_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3315_cast_fp16)[name = string("op_3330_cast_fp16")]; + tensor var_3334 = const()[name = string("op_3334"), val = tensor([0, 2, 1])]; + int32 var_3340 = const()[name = string("op_3340"), val = int32(-1)]; + fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_91_cast_fp16 = transpose(perm = var_3334, x = var_3330_cast_fp16)[name = string("transpose_131")]; + tensor var_3342_cast_fp16 = mul(x = x_91_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_3342_cast_fp16")]; + bool input_135_interleave_0 = const()[name = string("input_135_interleave_0"), val = bool(false)]; + tensor input_135_cast_fp16 = concat(axis = var_3340, interleave = input_135_interleave_0, values = (x_91_cast_fp16, var_3342_cast_fp16))[name = string("input_135_cast_fp16")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_3337_to_fp16 = const()[name = string("op_3337_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_3337_to_fp16, x = input_135_cast_fp16)[name = string("normed_125_cast_fp16")]; + tensor var_3347_split_sizes_0 = const()[name = string("op_3347_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3347_axis_0 = const()[name = string("op_3347_axis_0"), val = int32(-1)]; + tensor var_3347_cast_fp16_0, tensor var_3347_cast_fp16_1 = split(axis = var_3347_axis_0, split_sizes = var_3347_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_3347_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545781568)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_3347_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_93_cast_fp16")]; + int32 var_3356 = const()[name = string("op_3356"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3358_cast_fp16 = mul(x = x_93_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3358_cast_fp16")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137_cast_fp16 = concat(axis = var_3356, interleave = input_137_interleave_0, values = (x_93_cast_fp16, var_3358_cast_fp16))[name = string("input_137_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_3353_to_fp16 = const()[name = string("op_3353_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_3353_to_fp16, x = input_137_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_3363_split_sizes_0 = const()[name = string("op_3363_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3363_axis_0 = const()[name = string("op_3363_axis_0"), val = int32(-1)]; + tensor var_3363_cast_fp16_0, tensor var_3363_cast_fp16_1 = split(axis = var_3363_axis_0, split_sizes = var_3363_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_3363_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545786752)))]; + tensor h_27_cast_fp16 = mul(x = var_3363_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_3374 = const()[name = string("op_3374"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_3375 = transpose(perm = var_3374, x = h_27_cast_fp16)[name = string("transpose_130")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_3375)[name = string("input_139")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_139)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_139)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_141 = mul(x = gate_19, y = up_9)[name = string("input_141")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_141)[name = string("mlp_out_9")]; + tensor var_3415_axes_0 = const()[name = string("op_3415_axes_0"), val = tensor([2])]; + tensor var_3415 = squeeze(axes = var_3415_axes_0, x = mlp_out_9)[name = string("op_3415")]; + tensor var_3419 = const()[name = string("op_3419"), val = tensor([0, 2, 1])]; + int32 var_3425 = const()[name = string("op_3425"), val = int32(-1)]; + fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; + tensor x_95 = transpose(perm = var_3419, x = var_3415)[name = string("transpose_129")]; + tensor var_3427 = mul(x = x_95, y = const_57_promoted)[name = string("op_3427")]; + bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; + tensor input_143 = concat(axis = var_3425, interleave = input_143_interleave_0, values = (x_95, var_3427))[name = string("input_143")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_3422_to_fp16 = const()[name = string("op_3422_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_3422_to_fp16, x = input_143)[name = string("normed_133_cast_fp16")]; + tensor var_3432_split_sizes_0 = const()[name = string("op_3432_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3432_axis_0 = const()[name = string("op_3432_axis_0"), val = int32(-1)]; + tensor var_3432_0, tensor var_3432_1 = split(axis = var_3432_axis_0, split_sizes = var_3432_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_3432")]; + tensor hidden_states_43 = mul(x = var_3432_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_93_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 4096])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 1, 4352])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_3460 = const()[name = string("op_3460"), val = tensor([0, 2, 1])]; + tensor input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor([2])]; + tensor var_3461 = transpose(perm = var_3460, x = hidden_states_45_cast_fp16)[name = string("transpose_128")]; + tensor input_145 = expand_dims(axes = input_145_axes_0, x = var_3461)[name = string("input_145")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_145)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_3480 = const()[name = string("op_3480"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_3481_cast_fp16 = transpose(perm = var_3480, x = per_layer_slice_9_cast_fp16)[name = string("transpose_127")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_3481_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_147_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_147_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545791936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546119680))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_147_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_3497_axes_0 = const()[name = string("op_3497_axes_0"), val = tensor([2])]; + tensor var_3497_cast_fp16 = squeeze(axes = var_3497_axes_0, x = gated_29_cast_fp16)[name = string("op_3497_cast_fp16")]; + tensor var_3501 = const()[name = string("op_3501"), val = tensor([0, 2, 1])]; + int32 var_3507 = const()[name = string("op_3507"), val = int32(-1)]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_97_cast_fp16 = transpose(perm = var_3501, x = var_3497_cast_fp16)[name = string("transpose_126")]; + tensor var_3509_cast_fp16 = mul(x = x_97_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_3509_cast_fp16")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149_cast_fp16 = concat(axis = var_3507, interleave = input_149_interleave_0, values = (x_97_cast_fp16, var_3509_cast_fp16))[name = string("input_149_cast_fp16")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_3504_to_fp16 = const()[name = string("op_3504_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3504_to_fp16, x = input_149_cast_fp16)[name = string("normed_137_cast_fp16")]; + tensor var_3514_split_sizes_0 = const()[name = string("op_3514_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3514_axis_0 = const()[name = string("op_3514_axis_0"), val = int32(-1)]; + tensor var_3514_cast_fp16_0, tensor var_3514_cast_fp16_1 = split(axis = var_3514_axis_0, split_sizes = var_3514_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3514_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546122304)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_3514_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = tensor([0x1.46p-1])]; + tensor x_99_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_59_promoted_to_fp16)[name = string("x_99_cast_fp16")]; + tensor var_3526_axes_0 = const()[name = string("op_3526_axes_0"), val = tensor([0])]; + tensor var_3526_cast_fp16 = squeeze(axes = var_3526_axes_0, x = K_sliding_out_9_cast_fp16)[name = string("op_3526_cast_fp16")]; + tensor var_3528_axes_0 = const()[name = string("op_3528_axes_0"), val = tensor([0])]; + tensor var_3528_cast_fp16 = squeeze(axes = var_3528_axes_0, x = V_sliding_out_9_cast_fp16)[name = string("op_3528_cast_fp16")]; + tensor var_3531_begin_0 = const()[name = string("op_3531_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3531_end_0 = const()[name = string("op_3531_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_3531_end_mask_0 = const()[name = string("op_3531_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3531_squeeze_mask_0 = const()[name = string("op_3531_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3531_cast_fp16 = slice_by_index(begin = var_3531_begin_0, end = var_3531_end_0, end_mask = var_3531_end_mask_0, squeeze_mask = var_3531_squeeze_mask_0, x = K_full_in)[name = string("op_3531_cast_fp16")]; + tensor K_full_slot_1_axes_0 = const()[name = string("K_full_slot_1_axes_0"), val = tensor([0])]; + tensor K_full_slot_1_cast_fp16 = expand_dims(axes = K_full_slot_1_axes_0, x = var_3531_cast_fp16)[name = string("K_full_slot_1_cast_fp16")]; + tensor var_3536_begin_0 = const()[name = string("op_3536_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3536_end_0 = const()[name = string("op_3536_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_3536_end_mask_0 = const()[name = string("op_3536_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3536_squeeze_mask_0 = const()[name = string("op_3536_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3536_cast_fp16 = slice_by_index(begin = var_3536_begin_0, end = var_3536_end_0, end_mask = var_3536_end_mask_0, squeeze_mask = var_3536_squeeze_mask_0, x = V_full_in)[name = string("op_3536_cast_fp16")]; + tensor V_full_slot_1_axes_0 = const()[name = string("V_full_slot_1_axes_0"), val = tensor([0])]; + tensor V_full_slot_1_cast_fp16 = expand_dims(axes = V_full_slot_1_axes_0, x = var_3536_cast_fp16)[name = string("V_full_slot_1_cast_fp16")]; + int32 var_3543 = const()[name = string("op_3543"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3545_cast_fp16 = mul(x = x_99_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3545_cast_fp16")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151_cast_fp16 = concat(axis = var_3543, interleave = input_151_interleave_0, values = (x_99_cast_fp16, var_3545_cast_fp16))[name = string("input_151_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_3540_to_fp16 = const()[name = string("op_3540_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3540_to_fp16, x = input_151_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_3550_split_sizes_0 = const()[name = string("op_3550_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3550_axis_0 = const()[name = string("op_3550_axis_0"), val = int32(-1)]; + tensor var_3550_cast_fp16_0, tensor var_3550_cast_fp16_1 = split(axis = var_3550_axis_0, split_sizes = var_3550_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3550_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546127488)))]; + tensor h_31_cast_fp16 = mul(x = var_3550_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_3556 = const()[name = string("op_3556"), val = tensor([0, 2, 1])]; + tensor var_3559_axes_0 = const()[name = string("op_3559_axes_0"), val = tensor([2])]; + tensor var_3557_cast_fp16 = transpose(perm = var_3556, x = h_31_cast_fp16)[name = string("transpose_125")]; + tensor var_3559_cast_fp16 = expand_dims(axes = var_3559_axes_0, x = var_3557_cast_fp16)[name = string("op_3559_cast_fp16")]; + string var_3575_pad_type_0 = const()[name = string("op_3575_pad_type_0"), val = string("valid")]; + tensor var_3575_strides_0 = const()[name = string("op_3575_strides_0"), val = tensor([1, 1])]; + tensor var_3575_pad_0 = const()[name = string("op_3575_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3575_dilations_0 = const()[name = string("op_3575_dilations_0"), val = tensor([1, 1])]; + int32 var_3575_groups_0 = const()[name = string("op_3575_groups_0"), val = int32(1)]; + tensor var_3575 = conv(dilations = var_3575_dilations_0, groups = var_3575_groups_0, pad = var_3575_pad_0, pad_type = var_3575_pad_type_0, strides = var_3575_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_3559_cast_fp16)[name = string("op_3575")]; + tensor var_3580 = const()[name = string("op_3580"), val = tensor([1, 8, 512, 1])]; + tensor var_3581 = reshape(shape = var_3580, x = var_3575)[name = string("op_3581")]; + tensor var_3586 = const()[name = string("op_3586"), val = tensor([0, 1, 3, 2])]; + tensor var_3596 = const()[name = string("op_3596"), val = tensor([1, 8, 512])]; + tensor var_3587 = transpose(perm = var_3586, x = var_3581)[name = string("transpose_124")]; + tensor x_101 = reshape(shape = var_3596, x = var_3587)[name = string("x_101")]; + int32 var_3602 = const()[name = string("op_3602"), val = int32(-1)]; + fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; + tensor var_3604 = mul(x = x_101, y = const_61_promoted)[name = string("op_3604")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155 = concat(axis = var_3602, interleave = input_155_interleave_0, values = (x_101, var_3604))[name = string("input_155")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_3599_to_fp16 = const()[name = string("op_3599_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_3599_to_fp16, x = input_155)[name = string("normed_145_cast_fp16")]; + tensor var_3609_split_sizes_0 = const()[name = string("op_3609_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3609_axis_0 = const()[name = string("op_3609_axis_0"), val = int32(-1)]; + tensor var_3609_0, tensor var_3609_1 = split(axis = var_3609_axis_0, split_sizes = var_3609_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_3609")]; + tensor var_3611 = mul(x = var_3609_0, y = layers_5_self_attn_q_norm_weight)[name = string("op_3611")]; + tensor var_3616 = const()[name = string("op_3616"), val = tensor([1, 8, 1, 512])]; + tensor q_43 = reshape(shape = var_3616, x = var_3611)[name = string("q_43")]; + tensor var_3618_cast_fp16 = mul(x = q_43, y = cos_f)[name = string("op_3618_cast_fp16")]; + tensor var_3619_split_sizes_0 = const()[name = string("op_3619_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3619_axis_0 = const()[name = string("op_3619_axis_0"), val = int32(-1)]; + tensor var_3619_0, tensor var_3619_1 = split(axis = var_3619_axis_0, split_sizes = var_3619_split_sizes_0, x = q_43)[name = string("op_3619")]; + fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; + tensor var_3621 = mul(x = var_3619_1, y = const_62_promoted)[name = string("op_3621")]; + int32 var_3623 = const()[name = string("op_3623"), val = int32(-1)]; + bool var_3624_interleave_0 = const()[name = string("op_3624_interleave_0"), val = bool(false)]; + tensor var_3624 = concat(axis = var_3623, interleave = var_3624_interleave_0, values = (var_3621, var_3619_0))[name = string("op_3624")]; + tensor var_3625_cast_fp16 = mul(x = var_3624, y = sin_f)[name = string("op_3625_cast_fp16")]; + tensor q_47_cast_fp16 = add(x = var_3618_cast_fp16, y = var_3625_cast_fp16)[name = string("q_47_cast_fp16")]; + string var_3638_pad_type_0 = const()[name = string("op_3638_pad_type_0"), val = string("valid")]; + tensor var_3638_strides_0 = const()[name = string("op_3638_strides_0"), val = tensor([1, 1])]; + tensor var_3638_pad_0 = const()[name = string("op_3638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3638_dilations_0 = const()[name = string("op_3638_dilations_0"), val = tensor([1, 1])]; + int32 var_3638_groups_0 = const()[name = string("op_3638_groups_0"), val = int32(1)]; + tensor var_3638 = conv(dilations = var_3638_dilations_0, groups = var_3638_groups_0, pad = var_3638_pad_0, pad_type = var_3638_pad_type_0, strides = var_3638_strides_0, weight = layers_5_self_attn_k_proj_weight_palettized, x = var_3559_cast_fp16)[name = string("op_3638")]; + tensor var_3643 = const()[name = string("op_3643"), val = tensor([1, 2, 512, 1])]; + tensor var_3644 = reshape(shape = var_3643, x = var_3638)[name = string("op_3644")]; + tensor var_3649 = const()[name = string("op_3649"), val = tensor([0, 1, 3, 2])]; + string var_3666_pad_type_0 = const()[name = string("op_3666_pad_type_0"), val = string("valid")]; + tensor var_3666_strides_0 = const()[name = string("op_3666_strides_0"), val = tensor([1, 1])]; + tensor var_3666_pad_0 = const()[name = string("op_3666_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3666_dilations_0 = const()[name = string("op_3666_dilations_0"), val = tensor([1, 1])]; + int32 var_3666_groups_0 = const()[name = string("op_3666_groups_0"), val = int32(1)]; + tensor var_3666 = conv(dilations = var_3666_dilations_0, groups = var_3666_groups_0, pad = var_3666_pad_0, pad_type = var_3666_pad_type_0, strides = var_3666_strides_0, weight = layers_5_self_attn_v_proj_weight_palettized, x = var_3559_cast_fp16)[name = string("op_3666")]; + tensor var_3671 = const()[name = string("op_3671"), val = tensor([1, 2, 512, 1])]; + tensor var_3672 = reshape(shape = var_3671, x = var_3666)[name = string("op_3672")]; + tensor var_3677 = const()[name = string("op_3677"), val = tensor([0, 1, 3, 2])]; + tensor var_3687 = const()[name = string("op_3687"), val = tensor([1, 2, 512])]; + tensor var_3650 = transpose(perm = var_3649, x = var_3644)[name = string("transpose_123")]; + tensor x_103 = reshape(shape = var_3687, x = var_3650)[name = string("x_103")]; + int32 var_3693 = const()[name = string("op_3693"), val = int32(-1)]; + fp16 const_63_promoted = const()[name = string("const_63_promoted"), val = fp16(-0x1p+0)]; + tensor var_3695 = mul(x = x_103, y = const_63_promoted)[name = string("op_3695")]; + bool input_157_interleave_0 = const()[name = string("input_157_interleave_0"), val = bool(false)]; + tensor input_157 = concat(axis = var_3693, interleave = input_157_interleave_0, values = (x_103, var_3695))[name = string("input_157")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_3690_to_fp16 = const()[name = string("op_3690_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_3690_to_fp16, x = input_157)[name = string("normed_149_cast_fp16")]; + tensor var_3700_split_sizes_0 = const()[name = string("op_3700_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3700_axis_0 = const()[name = string("op_3700_axis_0"), val = int32(-1)]; + tensor var_3700_0, tensor var_3700_1 = split(axis = var_3700_axis_0, split_sizes = var_3700_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_3700")]; + tensor var_3702 = mul(x = var_3700_0, y = layers_5_self_attn_k_norm_weight)[name = string("op_3702")]; + tensor var_3707 = const()[name = string("op_3707"), val = tensor([1, 2, 1, 512])]; + tensor q_45 = reshape(shape = var_3707, x = var_3702)[name = string("q_45")]; + fp16 var_3709_promoted = const()[name = string("op_3709_promoted"), val = fp16(0x1p+1)]; + tensor var_3678 = transpose(perm = var_3677, x = var_3672)[name = string("transpose_122")]; + tensor var_3710 = pow(x = var_3678, y = var_3709_promoted)[name = string("op_3710")]; + tensor var_3715_axes_0 = const()[name = string("op_3715_axes_0"), val = tensor([-1])]; + bool var_3715_keep_dims_0 = const()[name = string("op_3715_keep_dims_0"), val = bool(true)]; + tensor var_3715 = reduce_mean(axes = var_3715_axes_0, keep_dims = var_3715_keep_dims_0, x = var_3710)[name = string("op_3715")]; + fp16 var_3717_to_fp16 = const()[name = string("op_3717_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_11_cast_fp16 = add(x = var_3715, y = var_3717_to_fp16)[name = string("mean_sq_11_cast_fp16")]; + fp32 var_3719_epsilon_0 = const()[name = string("op_3719_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3719_cast_fp16 = rsqrt(epsilon = var_3719_epsilon_0, x = mean_sq_11_cast_fp16)[name = string("op_3719_cast_fp16")]; + tensor v_1_cast_fp16 = mul(x = var_3678, y = var_3719_cast_fp16)[name = string("v_1_cast_fp16")]; + tensor var_3721_cast_fp16 = mul(x = q_45, y = cos_f)[name = string("op_3721_cast_fp16")]; + tensor var_3722_split_sizes_0 = const()[name = string("op_3722_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3722_axis_0 = const()[name = string("op_3722_axis_0"), val = int32(-1)]; + tensor var_3722_0, tensor var_3722_1 = split(axis = var_3722_axis_0, split_sizes = var_3722_split_sizes_0, x = q_45)[name = string("op_3722")]; + fp16 const_64_promoted = const()[name = string("const_64_promoted"), val = fp16(-0x1p+0)]; + tensor var_3724 = mul(x = var_3722_1, y = const_64_promoted)[name = string("op_3724")]; + int32 var_3726 = const()[name = string("op_3726"), val = int32(-1)]; + bool var_3727_interleave_0 = const()[name = string("op_3727_interleave_0"), val = bool(false)]; + tensor var_3727 = concat(axis = var_3726, interleave = var_3727_interleave_0, values = (var_3724, var_3722_0))[name = string("op_3727")]; + tensor var_3728_cast_fp16 = mul(x = var_3727, y = sin_f)[name = string("op_3728_cast_fp16")]; + tensor k_13_cast_fp16 = add(x = var_3721_cast_fp16, y = var_3728_cast_fp16)[name = string("k_13_cast_fp16")]; + fp16 var_3731_promoted_to_fp16 = const()[name = string("op_3731_promoted_to_fp16"), val = fp16(0x1p+0)]; + tensor var_3733_cast_fp16 = sub(x = var_3731_promoted_to_fp16, y = update_mask)[name = string("op_3733_cast_fp16")]; + tensor var_3734_cast_fp16 = mul(x = K_full_slot_1_cast_fp16, y = var_3733_cast_fp16)[name = string("op_3734_cast_fp16")]; + tensor var_3735_reps_0 = const()[name = string("op_3735_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_3735_cast_fp16 = tile(reps = var_3735_reps_0, x = k_13_cast_fp16)[name = string("op_3735_cast_fp16")]; + tensor var_3736_cast_fp16 = mul(x = var_3735_cast_fp16, y = update_mask)[name = string("op_3736_cast_fp16")]; + tensor K_full_out_1_cast_fp16 = add(x = var_3734_cast_fp16, y = var_3736_cast_fp16)[name = string("K_full_out_1_cast_fp16")]; + tensor var_3742_cast_fp16 = mul(x = V_full_slot_1_cast_fp16, y = var_3733_cast_fp16)[name = string("op_3742_cast_fp16")]; + tensor var_3743_reps_0 = const()[name = string("op_3743_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_3743_cast_fp16 = tile(reps = var_3743_reps_0, x = v_1_cast_fp16)[name = string("op_3743_cast_fp16")]; + tensor var_3744_cast_fp16 = mul(x = var_3743_cast_fp16, y = update_mask)[name = string("op_3744_cast_fp16")]; + tensor V_full_out_1_cast_fp16 = add(x = var_3742_cast_fp16, y = var_3744_cast_fp16)[name = string("V_full_out_1_cast_fp16")]; + tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = K_full_out_1_cast_fp16)[name = string("transpose_121")]; + tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_20, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_120")]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_21, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = V_full_out_1_cast_fp16)[name = string("transpose_119")]; + tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_22, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; + tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_118")]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_23, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor V_expanded_11_perm_0 = const()[name = string("V_expanded_11_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor transpose_53_cast_fp16 = transpose(perm = transpose_53_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_117")]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_47_cast_fp16, y = transpose_53_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_full)[name = string("x_107_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_107_cast_fp16)[name = string("reduce_max_5")]; + tensor var_3786 = sub(x = x_107_cast_fp16, y = reduce_max_5)[name = string("op_3786")]; + tensor var_3792 = exp(x = var_3786)[name = string("op_3792")]; + tensor var_3802_axes_0 = const()[name = string("op_3802_axes_0"), val = tensor([-1])]; + bool var_3802_keep_dims_0 = const()[name = string("op_3802_keep_dims_0"), val = bool(true)]; + tensor var_3802 = reduce_sum(axes = var_3802_axes_0, keep_dims = var_3802_keep_dims_0, x = var_3792)[name = string("op_3802")]; + tensor var_3808_cast_fp16 = real_div(x = var_3792, y = var_3802)[name = string("op_3808_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor V_expanded_11_cast_fp16 = transpose(perm = V_expanded_11_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_116")]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_3808_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_3819 = const()[name = string("op_3819"), val = tensor([0, 2, 1, 3])]; + tensor var_3826 = const()[name = string("op_3826"), val = tensor([1, 1, -1])]; + tensor var_3820_cast_fp16 = transpose(perm = var_3819, x = attn_output_31_cast_fp16)[name = string("transpose_115")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_3826, x = var_3820_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_3831 = const()[name = string("op_3831"), val = tensor([0, 2, 1])]; + string var_3847_pad_type_0 = const()[name = string("op_3847_pad_type_0"), val = string("valid")]; + int32 var_3847_groups_0 = const()[name = string("op_3847_groups_0"), val = int32(1)]; + tensor var_3847_strides_0 = const()[name = string("op_3847_strides_0"), val = tensor([1])]; + tensor var_3847_pad_0 = const()[name = string("op_3847_pad_0"), val = tensor([0, 0])]; + tensor var_3847_dilations_0 = const()[name = string("op_3847_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546132672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551375616))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3832_cast_fp16 = transpose(perm = var_3831, x = attn_output_33_cast_fp16)[name = string("transpose_114")]; + tensor var_3847_cast_fp16 = conv(dilations = var_3847_dilations_0, groups = var_3847_groups_0, pad = var_3847_pad_0, pad_type = var_3847_pad_type_0, strides = var_3847_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_3832_cast_fp16)[name = string("op_3847_cast_fp16")]; + tensor var_3851 = const()[name = string("op_3851"), val = tensor([0, 2, 1])]; + int32 var_3857 = const()[name = string("op_3857"), val = int32(-1)]; + fp16 const_65_promoted_to_fp16 = const()[name = string("const_65_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_111_cast_fp16 = transpose(perm = var_3851, x = var_3847_cast_fp16)[name = string("transpose_113")]; + tensor var_3859_cast_fp16 = mul(x = x_111_cast_fp16, y = const_65_promoted_to_fp16)[name = string("op_3859_cast_fp16")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161_cast_fp16 = concat(axis = var_3857, interleave = input_161_interleave_0, values = (x_111_cast_fp16, var_3859_cast_fp16))[name = string("input_161_cast_fp16")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_3854_to_fp16, x = input_161_cast_fp16)[name = string("normed_153_cast_fp16")]; + tensor var_3864_split_sizes_0 = const()[name = string("op_3864_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3864_axis_0 = const()[name = string("op_3864_axis_0"), val = int32(-1)]; + tensor var_3864_cast_fp16_0, tensor var_3864_cast_fp16_1 = split(axis = var_3864_axis_0, split_sizes = var_3864_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_3864_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551378240)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_3864_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_113_cast_fp16 = add(x = x_99_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_113_cast_fp16")]; + int32 var_3873 = const()[name = string("op_3873"), val = int32(-1)]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3875_cast_fp16 = mul(x = x_113_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_3875_cast_fp16")]; + bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)]; + tensor input_163_cast_fp16 = concat(axis = var_3873, interleave = input_163_interleave_0, values = (x_113_cast_fp16, var_3875_cast_fp16))[name = string("input_163_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_3870_to_fp16 = const()[name = string("op_3870_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_3870_to_fp16, x = input_163_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_3880_split_sizes_0 = const()[name = string("op_3880_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3880_axis_0 = const()[name = string("op_3880_axis_0"), val = int32(-1)]; + tensor var_3880_cast_fp16_0, tensor var_3880_cast_fp16_1 = split(axis = var_3880_axis_0, split_sizes = var_3880_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_3880_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551383424)))]; + tensor h_33_cast_fp16 = mul(x = var_3880_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_3891 = const()[name = string("op_3891"), val = tensor([0, 2, 1])]; + tensor input_165_axes_0 = const()[name = string("input_165_axes_0"), val = tensor([2])]; + tensor var_3892 = transpose(perm = var_3891, x = h_33_cast_fp16)[name = string("transpose_112")]; + tensor input_165 = expand_dims(axes = input_165_axes_0, x = var_3892)[name = string("input_165")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_165)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_165)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_167 = mul(x = gate_23, y = up_11)[name = string("input_167")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_167)[name = string("mlp_out_11")]; + tensor var_3932_axes_0 = const()[name = string("op_3932_axes_0"), val = tensor([2])]; + tensor var_3932 = squeeze(axes = var_3932_axes_0, x = mlp_out_11)[name = string("op_3932")]; + tensor var_3936 = const()[name = string("op_3936"), val = tensor([0, 2, 1])]; + int32 var_3942 = const()[name = string("op_3942"), val = int32(-1)]; + fp16 const_67_promoted = const()[name = string("const_67_promoted"), val = fp16(-0x1p+0)]; + tensor x_115 = transpose(perm = var_3936, x = var_3932)[name = string("transpose_111")]; + tensor var_3944 = mul(x = x_115, y = const_67_promoted)[name = string("op_3944")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169 = concat(axis = var_3942, interleave = input_169_interleave_0, values = (x_115, var_3944))[name = string("input_169")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_3939_to_fp16 = const()[name = string("op_3939_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3939_to_fp16, x = input_169)[name = string("normed_161_cast_fp16")]; + tensor var_3949_split_sizes_0 = const()[name = string("op_3949_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3949_axis_0 = const()[name = string("op_3949_axis_0"), val = int32(-1)]; + tensor var_3949_0, tensor var_3949_1 = split(axis = var_3949_axis_0, split_sizes = var_3949_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3949")]; + tensor hidden_states_53 = mul(x = var_3949_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_113_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 4352])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 1, 4608])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_3977 = const()[name = string("op_3977"), val = tensor([0, 2, 1])]; + tensor input_171_axes_0 = const()[name = string("input_171_axes_0"), val = tensor([2])]; + tensor var_3978 = transpose(perm = var_3977, x = hidden_states_55_cast_fp16)[name = string("transpose_110")]; + tensor input_171 = expand_dims(axes = input_171_axes_0, x = var_3978)[name = string("input_171")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_171)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_3997 = const()[name = string("op_3997"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_3998_cast_fp16 = transpose(perm = var_3997, x = per_layer_slice_11_cast_fp16)[name = string("transpose_109")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_3998_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_173_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_173_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551388608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551716352))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_173_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_4014_axes_0 = const()[name = string("op_4014_axes_0"), val = tensor([2])]; + tensor var_4014_cast_fp16 = squeeze(axes = var_4014_axes_0, x = gated_35_cast_fp16)[name = string("op_4014_cast_fp16")]; + tensor var_4018 = const()[name = string("op_4018"), val = tensor([0, 2, 1])]; + int32 var_4024 = const()[name = string("op_4024"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_117_cast_fp16 = transpose(perm = var_4018, x = var_4014_cast_fp16)[name = string("transpose_108")]; + tensor var_4026_cast_fp16 = mul(x = x_117_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_4026_cast_fp16")]; + bool input_175_interleave_0 = const()[name = string("input_175_interleave_0"), val = bool(false)]; + tensor input_175_cast_fp16 = concat(axis = var_4024, interleave = input_175_interleave_0, values = (x_117_cast_fp16, var_4026_cast_fp16))[name = string("input_175_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_4021_to_fp16 = const()[name = string("op_4021_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_4021_to_fp16, x = input_175_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor var_4031_split_sizes_0 = const()[name = string("op_4031_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4031_axis_0 = const()[name = string("op_4031_axis_0"), val = int32(-1)]; + tensor var_4031_cast_fp16_0, tensor var_4031_cast_fp16_1 = split(axis = var_4031_axis_0, split_sizes = var_4031_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_4031_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551718976)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_4031_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = tensor([0x1.b2p-2])]; + tensor x_119_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_69_promoted_to_fp16)[name = string("x_119_cast_fp16")]; + tensor var_4043_axes_0 = const()[name = string("op_4043_axes_0"), val = tensor([0])]; + tensor var_4043_cast_fp16 = squeeze(axes = var_4043_axes_0, x = K_full_out_1_cast_fp16)[name = string("op_4043_cast_fp16")]; + tensor var_4045_axes_0 = const()[name = string("op_4045_axes_0"), val = tensor([0])]; + tensor var_4045_cast_fp16 = squeeze(axes = var_4045_axes_0, x = V_full_out_1_cast_fp16)[name = string("op_4045_cast_fp16")]; + tensor var_4048_begin_0 = const()[name = string("op_4048_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4048_end_0 = const()[name = string("op_4048_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4048_end_mask_0 = const()[name = string("op_4048_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4048_squeeze_mask_0 = const()[name = string("op_4048_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4048_cast_fp16 = slice_by_index(begin = var_4048_begin_0, end = var_4048_end_0, end_mask = var_4048_end_mask_0, squeeze_mask = var_4048_squeeze_mask_0, x = K_sliding_in)[name = string("op_4048_cast_fp16")]; + tensor K_sliding_slot_11_axes_0 = const()[name = string("K_sliding_slot_11_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_11_cast_fp16 = expand_dims(axes = K_sliding_slot_11_axes_0, x = var_4048_cast_fp16)[name = string("K_sliding_slot_11_cast_fp16")]; + tensor var_4053_begin_0 = const()[name = string("op_4053_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4053_end_0 = const()[name = string("op_4053_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4053_end_mask_0 = const()[name = string("op_4053_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4053_squeeze_mask_0 = const()[name = string("op_4053_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4053_cast_fp16 = slice_by_index(begin = var_4053_begin_0, end = var_4053_end_0, end_mask = var_4053_end_mask_0, squeeze_mask = var_4053_squeeze_mask_0, x = V_sliding_in)[name = string("op_4053_cast_fp16")]; + tensor V_sliding_slot_11_axes_0 = const()[name = string("V_sliding_slot_11_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_11_cast_fp16 = expand_dims(axes = V_sliding_slot_11_axes_0, x = var_4053_cast_fp16)[name = string("V_sliding_slot_11_cast_fp16")]; + int32 var_4060 = const()[name = string("op_4060"), val = int32(-1)]; + fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4062_cast_fp16 = mul(x = x_119_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_4062_cast_fp16")]; + bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; + tensor input_177_cast_fp16 = concat(axis = var_4060, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_4062_cast_fp16))[name = string("input_177_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_4057_to_fp16 = const()[name = string("op_4057_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_4057_to_fp16, x = input_177_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_4067_split_sizes_0 = const()[name = string("op_4067_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4067_axis_0 = const()[name = string("op_4067_axis_0"), val = int32(-1)]; + tensor var_4067_cast_fp16_0, tensor var_4067_cast_fp16_1 = split(axis = var_4067_axis_0, split_sizes = var_4067_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_4067_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551724160)))]; + tensor h_37_cast_fp16 = mul(x = var_4067_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_4073 = const()[name = string("op_4073"), val = tensor([0, 2, 1])]; + tensor var_4076_axes_0 = const()[name = string("op_4076_axes_0"), val = tensor([2])]; + tensor var_4074_cast_fp16 = transpose(perm = var_4073, x = h_37_cast_fp16)[name = string("transpose_107")]; + tensor var_4076_cast_fp16 = expand_dims(axes = var_4076_axes_0, x = var_4074_cast_fp16)[name = string("op_4076_cast_fp16")]; + string var_4092_pad_type_0 = const()[name = string("op_4092_pad_type_0"), val = string("valid")]; + tensor var_4092_strides_0 = const()[name = string("op_4092_strides_0"), val = tensor([1, 1])]; + tensor var_4092_pad_0 = const()[name = string("op_4092_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4092_dilations_0 = const()[name = string("op_4092_dilations_0"), val = tensor([1, 1])]; + int32 var_4092_groups_0 = const()[name = string("op_4092_groups_0"), val = int32(1)]; + tensor var_4092 = conv(dilations = var_4092_dilations_0, groups = var_4092_groups_0, pad = var_4092_pad_0, pad_type = var_4092_pad_type_0, strides = var_4092_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_4076_cast_fp16)[name = string("op_4092")]; + tensor var_4097 = const()[name = string("op_4097"), val = tensor([1, 8, 256, 1])]; + tensor var_4098 = reshape(shape = var_4097, x = var_4092)[name = string("op_4098")]; + tensor var_4103 = const()[name = string("op_4103"), val = tensor([0, 1, 3, 2])]; + tensor var_4113 = const()[name = string("op_4113"), val = tensor([1, 8, 256])]; + tensor var_4104 = transpose(perm = var_4103, x = var_4098)[name = string("transpose_106")]; + tensor x_121 = reshape(shape = var_4113, x = var_4104)[name = string("x_121")]; + int32 var_4119 = const()[name = string("op_4119"), val = int32(-1)]; + fp16 const_71_promoted = const()[name = string("const_71_promoted"), val = fp16(-0x1p+0)]; + tensor var_4121 = mul(x = x_121, y = const_71_promoted)[name = string("op_4121")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181 = concat(axis = var_4119, interleave = input_181_interleave_0, values = (x_121, var_4121))[name = string("input_181")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_4116_to_fp16 = const()[name = string("op_4116_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_4116_to_fp16, x = input_181)[name = string("normed_173_cast_fp16")]; + tensor var_4126_split_sizes_0 = const()[name = string("op_4126_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4126_axis_0 = const()[name = string("op_4126_axis_0"), val = int32(-1)]; + tensor var_4126_0, tensor var_4126_1 = split(axis = var_4126_axis_0, split_sizes = var_4126_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_4126")]; + tensor var_4128 = mul(x = var_4126_0, y = layers_2_self_attn_q_norm_weight)[name = string("op_4128")]; + tensor var_4133 = const()[name = string("op_4133"), val = tensor([1, 8, 1, 256])]; + tensor q_51 = reshape(shape = var_4133, x = var_4128)[name = string("q_51")]; + tensor var_4135_cast_fp16 = mul(x = q_51, y = cos_s)[name = string("op_4135_cast_fp16")]; + tensor var_4136_split_sizes_0 = const()[name = string("op_4136_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4136_axis_0 = const()[name = string("op_4136_axis_0"), val = int32(-1)]; + tensor var_4136_0, tensor var_4136_1 = split(axis = var_4136_axis_0, split_sizes = var_4136_split_sizes_0, x = q_51)[name = string("op_4136")]; + fp16 const_72_promoted = const()[name = string("const_72_promoted"), val = fp16(-0x1p+0)]; + tensor var_4138 = mul(x = var_4136_1, y = const_72_promoted)[name = string("op_4138")]; + int32 var_4140 = const()[name = string("op_4140"), val = int32(-1)]; + bool var_4141_interleave_0 = const()[name = string("op_4141_interleave_0"), val = bool(false)]; + tensor var_4141 = concat(axis = var_4140, interleave = var_4141_interleave_0, values = (var_4138, var_4136_0))[name = string("op_4141")]; + tensor var_4142_cast_fp16 = mul(x = var_4141, y = sin_s)[name = string("op_4142_cast_fp16")]; + tensor q_55_cast_fp16 = add(x = var_4135_cast_fp16, y = var_4142_cast_fp16)[name = string("q_55_cast_fp16")]; + string var_4155_pad_type_0 = const()[name = string("op_4155_pad_type_0"), val = string("valid")]; + tensor var_4155_strides_0 = const()[name = string("op_4155_strides_0"), val = tensor([1, 1])]; + tensor var_4155_pad_0 = const()[name = string("op_4155_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4155_dilations_0 = const()[name = string("op_4155_dilations_0"), val = tensor([1, 1])]; + int32 var_4155_groups_0 = const()[name = string("op_4155_groups_0"), val = int32(1)]; + tensor var_4155 = conv(dilations = var_4155_dilations_0, groups = var_4155_groups_0, pad = var_4155_pad_0, pad_type = var_4155_pad_type_0, strides = var_4155_strides_0, weight = layers_6_self_attn_k_proj_weight_palettized, x = var_4076_cast_fp16)[name = string("op_4155")]; + tensor var_4160 = const()[name = string("op_4160"), val = tensor([1, 2, 256, 1])]; + tensor var_4161 = reshape(shape = var_4160, x = var_4155)[name = string("op_4161")]; + tensor var_4166 = const()[name = string("op_4166"), val = tensor([0, 1, 3, 2])]; + string var_4183_pad_type_0 = const()[name = string("op_4183_pad_type_0"), val = string("valid")]; + tensor var_4183_strides_0 = const()[name = string("op_4183_strides_0"), val = tensor([1, 1])]; + tensor var_4183_pad_0 = const()[name = string("op_4183_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4183_dilations_0 = const()[name = string("op_4183_dilations_0"), val = tensor([1, 1])]; + int32 var_4183_groups_0 = const()[name = string("op_4183_groups_0"), val = int32(1)]; + tensor var_4183 = conv(dilations = var_4183_dilations_0, groups = var_4183_groups_0, pad = var_4183_pad_0, pad_type = var_4183_pad_type_0, strides = var_4183_strides_0, weight = layers_6_self_attn_v_proj_weight_palettized, x = var_4076_cast_fp16)[name = string("op_4183")]; + tensor var_4188 = const()[name = string("op_4188"), val = tensor([1, 2, 256, 1])]; + tensor var_4189 = reshape(shape = var_4188, x = var_4183)[name = string("op_4189")]; + tensor var_4194 = const()[name = string("op_4194"), val = tensor([0, 1, 3, 2])]; + tensor var_4204 = const()[name = string("op_4204"), val = tensor([1, 2, 256])]; + tensor var_4167 = transpose(perm = var_4166, x = var_4161)[name = string("transpose_105")]; + tensor x_123 = reshape(shape = var_4204, x = var_4167)[name = string("x_123")]; + int32 var_4210 = const()[name = string("op_4210"), val = int32(-1)]; + fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; + tensor var_4212 = mul(x = x_123, y = const_73_promoted)[name = string("op_4212")]; + bool input_183_interleave_0 = const()[name = string("input_183_interleave_0"), val = bool(false)]; + tensor input_183 = concat(axis = var_4210, interleave = input_183_interleave_0, values = (x_123, var_4212))[name = string("input_183")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_4207_to_fp16 = const()[name = string("op_4207_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_4207_to_fp16, x = input_183)[name = string("normed_177_cast_fp16")]; + tensor var_4217_split_sizes_0 = const()[name = string("op_4217_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4217_axis_0 = const()[name = string("op_4217_axis_0"), val = int32(-1)]; + tensor var_4217_0, tensor var_4217_1 = split(axis = var_4217_axis_0, split_sizes = var_4217_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_4217")]; + tensor var_4219 = mul(x = var_4217_0, y = layers_6_self_attn_k_norm_weight)[name = string("op_4219")]; + tensor var_4224 = const()[name = string("op_4224"), val = tensor([1, 2, 1, 256])]; + tensor q_53 = reshape(shape = var_4224, x = var_4219)[name = string("q_53")]; + fp16 var_4226_promoted = const()[name = string("op_4226_promoted"), val = fp16(0x1p+1)]; + tensor var_4195 = transpose(perm = var_4194, x = var_4189)[name = string("transpose_104")]; + tensor var_4227 = pow(x = var_4195, y = var_4226_promoted)[name = string("op_4227")]; + tensor var_4232_axes_0 = const()[name = string("op_4232_axes_0"), val = tensor([-1])]; + bool var_4232_keep_dims_0 = const()[name = string("op_4232_keep_dims_0"), val = bool(true)]; + tensor var_4232 = reduce_mean(axes = var_4232_axes_0, keep_dims = var_4232_keep_dims_0, x = var_4227)[name = string("op_4232")]; + fp16 var_4234_to_fp16 = const()[name = string("op_4234_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_13_cast_fp16 = add(x = var_4232, y = var_4234_to_fp16)[name = string("mean_sq_13_cast_fp16")]; + fp32 var_4236_epsilon_0 = const()[name = string("op_4236_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4236_cast_fp16 = rsqrt(epsilon = var_4236_epsilon_0, x = mean_sq_13_cast_fp16)[name = string("op_4236_cast_fp16")]; + tensor input_187_cast_fp16 = mul(x = var_4195, y = var_4236_cast_fp16)[name = string("input_187_cast_fp16")]; + tensor var_4238_cast_fp16 = mul(x = q_53, y = cos_s)[name = string("op_4238_cast_fp16")]; + tensor var_4239_split_sizes_0 = const()[name = string("op_4239_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4239_axis_0 = const()[name = string("op_4239_axis_0"), val = int32(-1)]; + tensor var_4239_0, tensor var_4239_1 = split(axis = var_4239_axis_0, split_sizes = var_4239_split_sizes_0, x = q_53)[name = string("op_4239")]; + fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)]; + tensor var_4241 = mul(x = var_4239_1, y = const_74_promoted)[name = string("op_4241")]; + int32 var_4243 = const()[name = string("op_4243"), val = int32(-1)]; + bool var_4244_interleave_0 = const()[name = string("op_4244_interleave_0"), val = bool(false)]; + tensor var_4244 = concat(axis = var_4243, interleave = var_4244_interleave_0, values = (var_4241, var_4239_0))[name = string("op_4244")]; + tensor var_4245_cast_fp16 = mul(x = var_4244, y = sin_s)[name = string("op_4245_cast_fp16")]; + tensor input_185_cast_fp16 = add(x = var_4238_cast_fp16, y = var_4245_cast_fp16)[name = string("input_185_cast_fp16")]; + tensor k_padded_11_pad_0 = const()[name = string("k_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_11_mode_0 = const()[name = string("k_padded_11_mode_0"), val = string("constant")]; + fp16 const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_11_cast_fp16 = pad(constant_val = const_75_to_fp16, mode = k_padded_11_mode_0, pad = k_padded_11_pad_0, x = input_185_cast_fp16)[name = string("k_padded_11_cast_fp16")]; + tensor v_padded_11_pad_0 = const()[name = string("v_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_11_mode_0 = const()[name = string("v_padded_11_mode_0"), val = string("constant")]; + fp16 const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_11_cast_fp16 = pad(constant_val = const_76_to_fp16, mode = v_padded_11_mode_0, pad = v_padded_11_pad_0, x = input_187_cast_fp16)[name = string("v_padded_11_cast_fp16")]; + tensor var_4274_begin_0 = const()[name = string("op_4274_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4274_end_0 = const()[name = string("op_4274_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4274_end_mask_0 = const()[name = string("op_4274_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4274_cast_fp16 = slice_by_index(begin = var_4274_begin_0, end = var_4274_end_0, end_mask = var_4274_end_mask_0, x = K_sliding_slot_11_cast_fp16)[name = string("op_4274_cast_fp16")]; + int32 var_4281 = const()[name = string("op_4281"), val = int32(2)]; + bool K_sliding_out_11_interleave_0 = const()[name = string("K_sliding_out_11_interleave_0"), val = bool(false)]; + tensor K_sliding_out_11_cast_fp16 = concat(axis = var_4281, interleave = K_sliding_out_11_interleave_0, values = (var_4274_cast_fp16, k_padded_11_cast_fp16))[name = string("K_sliding_out_11_cast_fp16")]; + tensor var_4297_begin_0 = const()[name = string("op_4297_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4297_end_0 = const()[name = string("op_4297_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4297_end_mask_0 = const()[name = string("op_4297_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4297_cast_fp16 = slice_by_index(begin = var_4297_begin_0, end = var_4297_end_0, end_mask = var_4297_end_mask_0, x = V_sliding_slot_11_cast_fp16)[name = string("op_4297_cast_fp16")]; + int32 var_4304 = const()[name = string("op_4304"), val = int32(2)]; + bool V_sliding_out_11_interleave_0 = const()[name = string("V_sliding_out_11_interleave_0"), val = bool(false)]; + tensor V_sliding_out_11_cast_fp16 = concat(axis = var_4304, interleave = V_sliding_out_11_interleave_0, values = (var_4297_cast_fp16, v_padded_11_cast_fp16))[name = string("V_sliding_out_11_cast_fp16")]; + tensor K_for_attn_13_begin_0 = const()[name = string("K_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_13_end_0 = const()[name = string("K_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_13_end_mask_0 = const()[name = string("K_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_13_cast_fp16 = slice_by_index(begin = K_for_attn_13_begin_0, end = K_for_attn_13_end_0, end_mask = K_for_attn_13_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("K_for_attn_13_cast_fp16")]; + tensor V_for_attn_13_begin_0 = const()[name = string("V_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_13_end_0 = const()[name = string("V_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_13_end_mask_0 = const()[name = string("V_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_13_cast_fp16 = slice_by_index(begin = V_for_attn_13_begin_0, end = V_for_attn_13_end_0, end_mask = V_for_attn_13_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("V_for_attn_13_cast_fp16")]; + tensor transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_12_reps_0 = const()[name = string("tile_12_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_24_cast_fp16 = transpose(perm = transpose_24_perm_0, x = K_for_attn_13_cast_fp16)[name = string("transpose_103")]; + tensor tile_12_cast_fp16 = tile(reps = tile_12_reps_0, x = transpose_24_cast_fp16)[name = string("tile_12_cast_fp16")]; + tensor concat_24 = const()[name = string("concat_24"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_24, x = tile_12_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_25 = const()[name = string("concat_25"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = reshape_24_cast_fp16)[name = string("transpose_102")]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_25, x = transpose_25_cast_fp16)[name = string("reshape_25_cast_fp16")]; + tensor transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_13_reps_0 = const()[name = string("tile_13_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_26_cast_fp16 = transpose(perm = transpose_26_perm_0, x = V_for_attn_13_cast_fp16)[name = string("transpose_101")]; + tensor tile_13_cast_fp16 = tile(reps = tile_13_reps_0, x = transpose_26_cast_fp16)[name = string("tile_13_cast_fp16")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_26, x = tile_13_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_27 = const()[name = string("concat_27"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_27_cast_fp16 = transpose(perm = transpose_27_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_100")]; + tensor reshape_27_cast_fp16 = reshape(shape = concat_27, x = transpose_27_cast_fp16)[name = string("reshape_27_cast_fp16")]; + tensor V_expanded_13_perm_0 = const()[name = string("V_expanded_13_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor transpose_54_cast_fp16 = transpose(perm = transpose_54_perm_0, x = reshape_25_cast_fp16)[name = string("transpose_99")]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_55_cast_fp16, y = transpose_54_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_127_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_127_cast_fp16)[name = string("reduce_max_6")]; + tensor var_4345 = sub(x = x_127_cast_fp16, y = reduce_max_6)[name = string("op_4345")]; + tensor var_4351 = exp(x = var_4345)[name = string("op_4351")]; + tensor var_4361_axes_0 = const()[name = string("op_4361_axes_0"), val = tensor([-1])]; + bool var_4361_keep_dims_0 = const()[name = string("op_4361_keep_dims_0"), val = bool(true)]; + tensor var_4361 = reduce_sum(axes = var_4361_axes_0, keep_dims = var_4361_keep_dims_0, x = var_4351)[name = string("op_4361")]; + tensor var_4367_cast_fp16 = real_div(x = var_4351, y = var_4361)[name = string("op_4367_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor V_expanded_13_cast_fp16 = transpose(perm = V_expanded_13_perm_0, x = reshape_27_cast_fp16)[name = string("transpose_98")]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_4367_cast_fp16, y = V_expanded_13_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_4378 = const()[name = string("op_4378"), val = tensor([0, 2, 1, 3])]; + tensor var_4385 = const()[name = string("op_4385"), val = tensor([1, 1, -1])]; + tensor var_4379_cast_fp16 = transpose(perm = var_4378, x = attn_output_37_cast_fp16)[name = string("transpose_97")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_4385, x = var_4379_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_4390 = const()[name = string("op_4390"), val = tensor([0, 2, 1])]; + string var_4406_pad_type_0 = const()[name = string("op_4406_pad_type_0"), val = string("valid")]; + int32 var_4406_groups_0 = const()[name = string("op_4406_groups_0"), val = int32(1)]; + tensor var_4406_strides_0 = const()[name = string("op_4406_strides_0"), val = tensor([1])]; + tensor var_4406_pad_0 = const()[name = string("op_4406_pad_0"), val = tensor([0, 0])]; + tensor var_4406_dilations_0 = const()[name = string("op_4406_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551729344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554350848))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4391_cast_fp16 = transpose(perm = var_4390, x = attn_output_39_cast_fp16)[name = string("transpose_96")]; + tensor var_4406_cast_fp16 = conv(dilations = var_4406_dilations_0, groups = var_4406_groups_0, pad = var_4406_pad_0, pad_type = var_4406_pad_type_0, strides = var_4406_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4391_cast_fp16)[name = string("op_4406_cast_fp16")]; + tensor var_4410 = const()[name = string("op_4410"), val = tensor([0, 2, 1])]; + int32 var_4416 = const()[name = string("op_4416"), val = int32(-1)]; + fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_131_cast_fp16 = transpose(perm = var_4410, x = var_4406_cast_fp16)[name = string("transpose_95")]; + tensor var_4418_cast_fp16 = mul(x = x_131_cast_fp16, y = const_77_promoted_to_fp16)[name = string("op_4418_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_4416, interleave = input_191_interleave_0, values = (x_131_cast_fp16, var_4418_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_4413_to_fp16 = const()[name = string("op_4413_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_4413_to_fp16, x = input_191_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor var_4423_split_sizes_0 = const()[name = string("op_4423_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4423_axis_0 = const()[name = string("op_4423_axis_0"), val = int32(-1)]; + tensor var_4423_cast_fp16_0, tensor var_4423_cast_fp16_1 = split(axis = var_4423_axis_0, split_sizes = var_4423_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_4423_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554353472)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_4423_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_119_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_133_cast_fp16")]; + int32 var_4432 = const()[name = string("op_4432"), val = int32(-1)]; + fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4434_cast_fp16 = mul(x = x_133_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_4434_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_4432, interleave = input_193_interleave_0, values = (x_133_cast_fp16, var_4434_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_4429_to_fp16 = const()[name = string("op_4429_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_4429_to_fp16, x = input_193_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor var_4439_split_sizes_0 = const()[name = string("op_4439_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4439_axis_0 = const()[name = string("op_4439_axis_0"), val = int32(-1)]; + tensor var_4439_cast_fp16_0, tensor var_4439_cast_fp16_1 = split(axis = var_4439_axis_0, split_sizes = var_4439_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_4439_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554358656)))]; + tensor h_39_cast_fp16 = mul(x = var_4439_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_4450 = const()[name = string("op_4450"), val = tensor([0, 2, 1])]; + tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; + tensor var_4451 = transpose(perm = var_4450, x = h_39_cast_fp16)[name = string("transpose_94")]; + tensor input_195 = expand_dims(axes = input_195_axes_0, x = var_4451)[name = string("input_195")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_195)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_195)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_197 = mul(x = gate_27, y = up_13)[name = string("input_197")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_197)[name = string("mlp_out_13")]; + tensor var_4491_axes_0 = const()[name = string("op_4491_axes_0"), val = tensor([2])]; + tensor var_4491 = squeeze(axes = var_4491_axes_0, x = mlp_out_13)[name = string("op_4491")]; + tensor var_4495 = const()[name = string("op_4495"), val = tensor([0, 2, 1])]; + int32 var_4501 = const()[name = string("op_4501"), val = int32(-1)]; + fp16 const_79_promoted = const()[name = string("const_79_promoted"), val = fp16(-0x1p+0)]; + tensor x_135 = transpose(perm = var_4495, x = var_4491)[name = string("transpose_93")]; + tensor var_4503 = mul(x = x_135, y = const_79_promoted)[name = string("op_4503")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199 = concat(axis = var_4501, interleave = input_199_interleave_0, values = (x_135, var_4503))[name = string("input_199")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_4498_to_fp16 = const()[name = string("op_4498_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_4498_to_fp16, x = input_199)[name = string("normed_189_cast_fp16")]; + tensor var_4508_split_sizes_0 = const()[name = string("op_4508_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4508_axis_0 = const()[name = string("op_4508_axis_0"), val = int32(-1)]; + tensor var_4508_0, tensor var_4508_1 = split(axis = var_4508_axis_0, split_sizes = var_4508_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_4508")]; + tensor hidden_states_63 = mul(x = var_4508_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 4608])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 1, 4864])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_4536 = const()[name = string("op_4536"), val = tensor([0, 2, 1])]; + tensor input_201_axes_0 = const()[name = string("input_201_axes_0"), val = tensor([2])]; + tensor var_4537 = transpose(perm = var_4536, x = hidden_states_65_cast_fp16)[name = string("transpose_92")]; + tensor input_201 = expand_dims(axes = input_201_axes_0, x = var_4537)[name = string("input_201")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_201)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_4556 = const()[name = string("op_4556"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_4557_cast_fp16 = transpose(perm = var_4556, x = per_layer_slice_13_cast_fp16)[name = string("transpose_91")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_4557_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_203_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_203_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554363840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554691584))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_203_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_4573_axes_0 = const()[name = string("op_4573_axes_0"), val = tensor([2])]; + tensor var_4573_cast_fp16 = squeeze(axes = var_4573_axes_0, x = gated_41_cast_fp16)[name = string("op_4573_cast_fp16")]; + tensor var_4577 = const()[name = string("op_4577"), val = tensor([0, 2, 1])]; + int32 var_4583 = const()[name = string("op_4583"), val = int32(-1)]; + fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_137_cast_fp16 = transpose(perm = var_4577, x = var_4573_cast_fp16)[name = string("transpose_90")]; + tensor var_4585_cast_fp16 = mul(x = x_137_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_4585_cast_fp16")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205_cast_fp16 = concat(axis = var_4583, interleave = input_205_interleave_0, values = (x_137_cast_fp16, var_4585_cast_fp16))[name = string("input_205_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_4580_to_fp16 = const()[name = string("op_4580_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_4580_to_fp16, x = input_205_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor var_4590_split_sizes_0 = const()[name = string("op_4590_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4590_axis_0 = const()[name = string("op_4590_axis_0"), val = int32(-1)]; + tensor var_4590_cast_fp16_0, tensor var_4590_cast_fp16_1 = split(axis = var_4590_axis_0, split_sizes = var_4590_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_4590_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554694208)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_4590_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_81_promoted_to_fp16 = const()[name = string("const_81_promoted_to_fp16"), val = tensor([0x1.16p-1])]; + tensor x_139_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_81_promoted_to_fp16)[name = string("x_139_cast_fp16")]; + tensor var_4602_axes_0 = const()[name = string("op_4602_axes_0"), val = tensor([0])]; + tensor var_4602_cast_fp16 = squeeze(axes = var_4602_axes_0, x = K_sliding_out_11_cast_fp16)[name = string("op_4602_cast_fp16")]; + tensor var_4604_axes_0 = const()[name = string("op_4604_axes_0"), val = tensor([0])]; + tensor var_4604_cast_fp16 = squeeze(axes = var_4604_axes_0, x = V_sliding_out_11_cast_fp16)[name = string("op_4604_cast_fp16")]; + tensor var_4607_begin_0 = const()[name = string("op_4607_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4607_end_0 = const()[name = string("op_4607_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_4607_end_mask_0 = const()[name = string("op_4607_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4607_squeeze_mask_0 = const()[name = string("op_4607_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4607_cast_fp16 = slice_by_index(begin = var_4607_begin_0, end = var_4607_end_0, end_mask = var_4607_end_mask_0, squeeze_mask = var_4607_squeeze_mask_0, x = K_sliding_in)[name = string("op_4607_cast_fp16")]; + tensor K_sliding_slot_13_axes_0 = const()[name = string("K_sliding_slot_13_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_13_cast_fp16 = expand_dims(axes = K_sliding_slot_13_axes_0, x = var_4607_cast_fp16)[name = string("K_sliding_slot_13_cast_fp16")]; + tensor var_4612_begin_0 = const()[name = string("op_4612_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4612_end_0 = const()[name = string("op_4612_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_4612_end_mask_0 = const()[name = string("op_4612_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4612_squeeze_mask_0 = const()[name = string("op_4612_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4612_cast_fp16 = slice_by_index(begin = var_4612_begin_0, end = var_4612_end_0, end_mask = var_4612_end_mask_0, squeeze_mask = var_4612_squeeze_mask_0, x = V_sliding_in)[name = string("op_4612_cast_fp16")]; + tensor V_sliding_slot_13_axes_0 = const()[name = string("V_sliding_slot_13_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_13_cast_fp16 = expand_dims(axes = V_sliding_slot_13_axes_0, x = var_4612_cast_fp16)[name = string("V_sliding_slot_13_cast_fp16")]; + int32 var_4619 = const()[name = string("op_4619"), val = int32(-1)]; + fp16 const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4621_cast_fp16 = mul(x = x_139_cast_fp16, y = const_82_promoted_to_fp16)[name = string("op_4621_cast_fp16")]; + bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; + tensor input_207_cast_fp16 = concat(axis = var_4619, interleave = input_207_interleave_0, values = (x_139_cast_fp16, var_4621_cast_fp16))[name = string("input_207_cast_fp16")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_4616_to_fp16 = const()[name = string("op_4616_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_4616_to_fp16, x = input_207_cast_fp16)[name = string("normed_197_cast_fp16")]; + tensor var_4626_split_sizes_0 = const()[name = string("op_4626_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4626_axis_0 = const()[name = string("op_4626_axis_0"), val = int32(-1)]; + tensor var_4626_cast_fp16_0, tensor var_4626_cast_fp16_1 = split(axis = var_4626_axis_0, split_sizes = var_4626_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_4626_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554699392)))]; + tensor h_43_cast_fp16 = mul(x = var_4626_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_4632 = const()[name = string("op_4632"), val = tensor([0, 2, 1])]; + tensor var_4635_axes_0 = const()[name = string("op_4635_axes_0"), val = tensor([2])]; + tensor var_4633_cast_fp16 = transpose(perm = var_4632, x = h_43_cast_fp16)[name = string("transpose_89")]; + tensor var_4635_cast_fp16 = expand_dims(axes = var_4635_axes_0, x = var_4633_cast_fp16)[name = string("op_4635_cast_fp16")]; + string var_4651_pad_type_0 = const()[name = string("op_4651_pad_type_0"), val = string("valid")]; + tensor var_4651_strides_0 = const()[name = string("op_4651_strides_0"), val = tensor([1, 1])]; + tensor var_4651_pad_0 = const()[name = string("op_4651_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4651_dilations_0 = const()[name = string("op_4651_dilations_0"), val = tensor([1, 1])]; + int32 var_4651_groups_0 = const()[name = string("op_4651_groups_0"), val = int32(1)]; + tensor var_4651 = conv(dilations = var_4651_dilations_0, groups = var_4651_groups_0, pad = var_4651_pad_0, pad_type = var_4651_pad_type_0, strides = var_4651_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_4635_cast_fp16)[name = string("op_4651")]; + tensor var_4656 = const()[name = string("op_4656"), val = tensor([1, 8, 256, 1])]; + tensor var_4657 = reshape(shape = var_4656, x = var_4651)[name = string("op_4657")]; + tensor var_4662 = const()[name = string("op_4662"), val = tensor([0, 1, 3, 2])]; + tensor var_4672 = const()[name = string("op_4672"), val = tensor([1, 8, 256])]; + tensor var_4663 = transpose(perm = var_4662, x = var_4657)[name = string("transpose_88")]; + tensor x_141 = reshape(shape = var_4672, x = var_4663)[name = string("x_141")]; + int32 var_4678 = const()[name = string("op_4678"), val = int32(-1)]; + fp16 const_83_promoted = const()[name = string("const_83_promoted"), val = fp16(-0x1p+0)]; + tensor var_4680 = mul(x = x_141, y = const_83_promoted)[name = string("op_4680")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211 = concat(axis = var_4678, interleave = input_211_interleave_0, values = (x_141, var_4680))[name = string("input_211")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_4675_to_fp16 = const()[name = string("op_4675_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_4675_to_fp16, x = input_211)[name = string("normed_201_cast_fp16")]; + tensor var_4685_split_sizes_0 = const()[name = string("op_4685_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4685_axis_0 = const()[name = string("op_4685_axis_0"), val = int32(-1)]; + tensor var_4685_0, tensor var_4685_1 = split(axis = var_4685_axis_0, split_sizes = var_4685_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_4685")]; + tensor var_4687 = mul(x = var_4685_0, y = layers_7_self_attn_q_norm_weight)[name = string("op_4687")]; + tensor var_4692 = const()[name = string("op_4692"), val = tensor([1, 8, 1, 256])]; + tensor q_59 = reshape(shape = var_4692, x = var_4687)[name = string("q_59")]; + tensor var_4694_cast_fp16 = mul(x = q_59, y = cos_s)[name = string("op_4694_cast_fp16")]; + tensor var_4695_split_sizes_0 = const()[name = string("op_4695_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4695_axis_0 = const()[name = string("op_4695_axis_0"), val = int32(-1)]; + tensor var_4695_0, tensor var_4695_1 = split(axis = var_4695_axis_0, split_sizes = var_4695_split_sizes_0, x = q_59)[name = string("op_4695")]; + fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; + tensor var_4697 = mul(x = var_4695_1, y = const_84_promoted)[name = string("op_4697")]; + int32 var_4699 = const()[name = string("op_4699"), val = int32(-1)]; + bool var_4700_interleave_0 = const()[name = string("op_4700_interleave_0"), val = bool(false)]; + tensor var_4700 = concat(axis = var_4699, interleave = var_4700_interleave_0, values = (var_4697, var_4695_0))[name = string("op_4700")]; + tensor var_4701_cast_fp16 = mul(x = var_4700, y = sin_s)[name = string("op_4701_cast_fp16")]; + tensor q_63_cast_fp16 = add(x = var_4694_cast_fp16, y = var_4701_cast_fp16)[name = string("q_63_cast_fp16")]; + string var_4714_pad_type_0 = const()[name = string("op_4714_pad_type_0"), val = string("valid")]; + tensor var_4714_strides_0 = const()[name = string("op_4714_strides_0"), val = tensor([1, 1])]; + tensor var_4714_pad_0 = const()[name = string("op_4714_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4714_dilations_0 = const()[name = string("op_4714_dilations_0"), val = tensor([1, 1])]; + int32 var_4714_groups_0 = const()[name = string("op_4714_groups_0"), val = int32(1)]; + tensor var_4714 = conv(dilations = var_4714_dilations_0, groups = var_4714_groups_0, pad = var_4714_pad_0, pad_type = var_4714_pad_type_0, strides = var_4714_strides_0, weight = layers_7_self_attn_k_proj_weight_palettized, x = var_4635_cast_fp16)[name = string("op_4714")]; + tensor var_4719 = const()[name = string("op_4719"), val = tensor([1, 2, 256, 1])]; + tensor var_4720 = reshape(shape = var_4719, x = var_4714)[name = string("op_4720")]; + tensor var_4725 = const()[name = string("op_4725"), val = tensor([0, 1, 3, 2])]; + string var_4742_pad_type_0 = const()[name = string("op_4742_pad_type_0"), val = string("valid")]; + tensor var_4742_strides_0 = const()[name = string("op_4742_strides_0"), val = tensor([1, 1])]; + tensor var_4742_pad_0 = const()[name = string("op_4742_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4742_dilations_0 = const()[name = string("op_4742_dilations_0"), val = tensor([1, 1])]; + int32 var_4742_groups_0 = const()[name = string("op_4742_groups_0"), val = int32(1)]; + tensor var_4742 = conv(dilations = var_4742_dilations_0, groups = var_4742_groups_0, pad = var_4742_pad_0, pad_type = var_4742_pad_type_0, strides = var_4742_strides_0, weight = layers_7_self_attn_v_proj_weight_palettized, x = var_4635_cast_fp16)[name = string("op_4742")]; + tensor var_4747 = const()[name = string("op_4747"), val = tensor([1, 2, 256, 1])]; + tensor var_4748 = reshape(shape = var_4747, x = var_4742)[name = string("op_4748")]; + tensor var_4753 = const()[name = string("op_4753"), val = tensor([0, 1, 3, 2])]; + tensor var_4763 = const()[name = string("op_4763"), val = tensor([1, 2, 256])]; + tensor var_4726 = transpose(perm = var_4725, x = var_4720)[name = string("transpose_87")]; + tensor x_143 = reshape(shape = var_4763, x = var_4726)[name = string("x_143")]; + int32 var_4769 = const()[name = string("op_4769"), val = int32(-1)]; + fp16 const_85_promoted = const()[name = string("const_85_promoted"), val = fp16(-0x1p+0)]; + tensor var_4771 = mul(x = x_143, y = const_85_promoted)[name = string("op_4771")]; + bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; + tensor input_213 = concat(axis = var_4769, interleave = input_213_interleave_0, values = (x_143, var_4771))[name = string("input_213")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_4766_to_fp16 = const()[name = string("op_4766_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_4766_to_fp16, x = input_213)[name = string("normed_205_cast_fp16")]; + tensor var_4776_split_sizes_0 = const()[name = string("op_4776_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4776_axis_0 = const()[name = string("op_4776_axis_0"), val = int32(-1)]; + tensor var_4776_0, tensor var_4776_1 = split(axis = var_4776_axis_0, split_sizes = var_4776_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_4776")]; + tensor var_4778 = mul(x = var_4776_0, y = layers_7_self_attn_k_norm_weight)[name = string("op_4778")]; + tensor var_4783 = const()[name = string("op_4783"), val = tensor([1, 2, 1, 256])]; + tensor q_61 = reshape(shape = var_4783, x = var_4778)[name = string("q_61")]; + fp16 var_4785_promoted = const()[name = string("op_4785_promoted"), val = fp16(0x1p+1)]; + tensor var_4754 = transpose(perm = var_4753, x = var_4748)[name = string("transpose_86")]; + tensor var_4786 = pow(x = var_4754, y = var_4785_promoted)[name = string("op_4786")]; + tensor var_4791_axes_0 = const()[name = string("op_4791_axes_0"), val = tensor([-1])]; + bool var_4791_keep_dims_0 = const()[name = string("op_4791_keep_dims_0"), val = bool(true)]; + tensor var_4791 = reduce_mean(axes = var_4791_axes_0, keep_dims = var_4791_keep_dims_0, x = var_4786)[name = string("op_4791")]; + fp16 var_4793_to_fp16 = const()[name = string("op_4793_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_15_cast_fp16 = add(x = var_4791, y = var_4793_to_fp16)[name = string("mean_sq_15_cast_fp16")]; + fp32 var_4795_epsilon_0 = const()[name = string("op_4795_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4795_cast_fp16 = rsqrt(epsilon = var_4795_epsilon_0, x = mean_sq_15_cast_fp16)[name = string("op_4795_cast_fp16")]; + tensor input_217_cast_fp16 = mul(x = var_4754, y = var_4795_cast_fp16)[name = string("input_217_cast_fp16")]; + tensor var_4797_cast_fp16 = mul(x = q_61, y = cos_s)[name = string("op_4797_cast_fp16")]; + tensor var_4798_split_sizes_0 = const()[name = string("op_4798_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4798_axis_0 = const()[name = string("op_4798_axis_0"), val = int32(-1)]; + tensor var_4798_0, tensor var_4798_1 = split(axis = var_4798_axis_0, split_sizes = var_4798_split_sizes_0, x = q_61)[name = string("op_4798")]; + fp16 const_86_promoted = const()[name = string("const_86_promoted"), val = fp16(-0x1p+0)]; + tensor var_4800 = mul(x = var_4798_1, y = const_86_promoted)[name = string("op_4800")]; + int32 var_4802 = const()[name = string("op_4802"), val = int32(-1)]; + bool var_4803_interleave_0 = const()[name = string("op_4803_interleave_0"), val = bool(false)]; + tensor var_4803 = concat(axis = var_4802, interleave = var_4803_interleave_0, values = (var_4800, var_4798_0))[name = string("op_4803")]; + tensor var_4804_cast_fp16 = mul(x = var_4803, y = sin_s)[name = string("op_4804_cast_fp16")]; + tensor input_215_cast_fp16 = add(x = var_4797_cast_fp16, y = var_4804_cast_fp16)[name = string("input_215_cast_fp16")]; + tensor k_padded_13_pad_0 = const()[name = string("k_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_13_mode_0 = const()[name = string("k_padded_13_mode_0"), val = string("constant")]; + fp16 const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_13_cast_fp16 = pad(constant_val = const_87_to_fp16, mode = k_padded_13_mode_0, pad = k_padded_13_pad_0, x = input_215_cast_fp16)[name = string("k_padded_13_cast_fp16")]; + tensor v_padded_13_pad_0 = const()[name = string("v_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_13_mode_0 = const()[name = string("v_padded_13_mode_0"), val = string("constant")]; + fp16 const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_13_cast_fp16 = pad(constant_val = const_88_to_fp16, mode = v_padded_13_mode_0, pad = v_padded_13_pad_0, x = input_217_cast_fp16)[name = string("v_padded_13_cast_fp16")]; + tensor var_4833_begin_0 = const()[name = string("op_4833_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4833_end_0 = const()[name = string("op_4833_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4833_end_mask_0 = const()[name = string("op_4833_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4833_cast_fp16 = slice_by_index(begin = var_4833_begin_0, end = var_4833_end_0, end_mask = var_4833_end_mask_0, x = K_sliding_slot_13_cast_fp16)[name = string("op_4833_cast_fp16")]; + int32 var_4840 = const()[name = string("op_4840"), val = int32(2)]; + bool K_sliding_out_13_interleave_0 = const()[name = string("K_sliding_out_13_interleave_0"), val = bool(false)]; + tensor K_sliding_out_13_cast_fp16 = concat(axis = var_4840, interleave = K_sliding_out_13_interleave_0, values = (var_4833_cast_fp16, k_padded_13_cast_fp16))[name = string("K_sliding_out_13_cast_fp16")]; + tensor var_4856_begin_0 = const()[name = string("op_4856_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4856_end_0 = const()[name = string("op_4856_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4856_end_mask_0 = const()[name = string("op_4856_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4856_cast_fp16 = slice_by_index(begin = var_4856_begin_0, end = var_4856_end_0, end_mask = var_4856_end_mask_0, x = V_sliding_slot_13_cast_fp16)[name = string("op_4856_cast_fp16")]; + int32 var_4863 = const()[name = string("op_4863"), val = int32(2)]; + bool V_sliding_out_13_interleave_0 = const()[name = string("V_sliding_out_13_interleave_0"), val = bool(false)]; + tensor V_sliding_out_13_cast_fp16 = concat(axis = var_4863, interleave = V_sliding_out_13_interleave_0, values = (var_4856_cast_fp16, v_padded_13_cast_fp16))[name = string("V_sliding_out_13_cast_fp16")]; + tensor K_for_attn_15_begin_0 = const()[name = string("K_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_15_end_0 = const()[name = string("K_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_15_end_mask_0 = const()[name = string("K_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_15_cast_fp16 = slice_by_index(begin = K_for_attn_15_begin_0, end = K_for_attn_15_end_0, end_mask = K_for_attn_15_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("K_for_attn_15_cast_fp16")]; + tensor V_for_attn_15_begin_0 = const()[name = string("V_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_15_end_0 = const()[name = string("V_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_15_end_mask_0 = const()[name = string("V_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_15_cast_fp16 = slice_by_index(begin = V_for_attn_15_begin_0, end = V_for_attn_15_end_0, end_mask = V_for_attn_15_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("V_for_attn_15_cast_fp16")]; + tensor transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_14_reps_0 = const()[name = string("tile_14_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_28_cast_fp16 = transpose(perm = transpose_28_perm_0, x = K_for_attn_15_cast_fp16)[name = string("transpose_85")]; + tensor tile_14_cast_fp16 = tile(reps = tile_14_reps_0, x = transpose_28_cast_fp16)[name = string("tile_14_cast_fp16")]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_28_cast_fp16 = reshape(shape = concat_28, x = tile_14_cast_fp16)[name = string("reshape_28_cast_fp16")]; + tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = reshape_28_cast_fp16)[name = string("transpose_84")]; + tensor reshape_29_cast_fp16 = reshape(shape = concat_29, x = transpose_29_cast_fp16)[name = string("reshape_29_cast_fp16")]; + tensor transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_30_perm_0 = const()[name = string("transpose_30_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_15_reps_0 = const()[name = string("tile_15_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_30_cast_fp16 = transpose(perm = transpose_30_perm_0, x = V_for_attn_15_cast_fp16)[name = string("transpose_83")]; + tensor tile_15_cast_fp16 = tile(reps = tile_15_reps_0, x = transpose_30_cast_fp16)[name = string("tile_15_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_30_cast_fp16 = reshape(shape = concat_30, x = tile_15_cast_fp16)[name = string("reshape_30_cast_fp16")]; + tensor transpose_31_perm_0 = const()[name = string("transpose_31_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_31_cast_fp16 = transpose(perm = transpose_31_perm_0, x = reshape_30_cast_fp16)[name = string("transpose_82")]; + tensor reshape_31_cast_fp16 = reshape(shape = concat_31, x = transpose_31_cast_fp16)[name = string("reshape_31_cast_fp16")]; + tensor V_expanded_15_perm_0 = const()[name = string("V_expanded_15_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor transpose_55_cast_fp16 = transpose(perm = transpose_55_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_81")]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_63_cast_fp16, y = transpose_55_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_147_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_147_cast_fp16)[name = string("reduce_max_7")]; + tensor var_4904 = sub(x = x_147_cast_fp16, y = reduce_max_7)[name = string("op_4904")]; + tensor var_4910 = exp(x = var_4904)[name = string("op_4910")]; + tensor var_4920_axes_0 = const()[name = string("op_4920_axes_0"), val = tensor([-1])]; + bool var_4920_keep_dims_0 = const()[name = string("op_4920_keep_dims_0"), val = bool(true)]; + tensor var_4920 = reduce_sum(axes = var_4920_axes_0, keep_dims = var_4920_keep_dims_0, x = var_4910)[name = string("op_4920")]; + tensor var_4926_cast_fp16 = real_div(x = var_4910, y = var_4920)[name = string("op_4926_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor V_expanded_15_cast_fp16 = transpose(perm = V_expanded_15_perm_0, x = reshape_31_cast_fp16)[name = string("transpose_80")]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_4926_cast_fp16, y = V_expanded_15_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_4937 = const()[name = string("op_4937"), val = tensor([0, 2, 1, 3])]; + tensor var_4944 = const()[name = string("op_4944"), val = tensor([1, 1, -1])]; + tensor var_4938_cast_fp16 = transpose(perm = var_4937, x = attn_output_43_cast_fp16)[name = string("transpose_79")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_4944, x = var_4938_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_4949 = const()[name = string("op_4949"), val = tensor([0, 2, 1])]; + string var_4965_pad_type_0 = const()[name = string("op_4965_pad_type_0"), val = string("valid")]; + int32 var_4965_groups_0 = const()[name = string("op_4965_groups_0"), val = int32(1)]; + tensor var_4965_strides_0 = const()[name = string("op_4965_strides_0"), val = tensor([1])]; + tensor var_4965_pad_0 = const()[name = string("op_4965_pad_0"), val = tensor([0, 0])]; + tensor var_4965_dilations_0 = const()[name = string("op_4965_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554704576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557326080))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4950_cast_fp16 = transpose(perm = var_4949, x = attn_output_45_cast_fp16)[name = string("transpose_78")]; + tensor var_4965_cast_fp16 = conv(dilations = var_4965_dilations_0, groups = var_4965_groups_0, pad = var_4965_pad_0, pad_type = var_4965_pad_type_0, strides = var_4965_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_4950_cast_fp16)[name = string("op_4965_cast_fp16")]; + tensor var_4969 = const()[name = string("op_4969"), val = tensor([0, 2, 1])]; + int32 var_4975 = const()[name = string("op_4975"), val = int32(-1)]; + fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_151_cast_fp16 = transpose(perm = var_4969, x = var_4965_cast_fp16)[name = string("transpose_77")]; + tensor var_4977_cast_fp16 = mul(x = x_151_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_4977_cast_fp16")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221_cast_fp16 = concat(axis = var_4975, interleave = input_221_interleave_0, values = (x_151_cast_fp16, var_4977_cast_fp16))[name = string("input_221_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_4972_to_fp16 = const()[name = string("op_4972_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_4972_to_fp16, x = input_221_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor var_4982_split_sizes_0 = const()[name = string("op_4982_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4982_axis_0 = const()[name = string("op_4982_axis_0"), val = int32(-1)]; + tensor var_4982_cast_fp16_0, tensor var_4982_cast_fp16_1 = split(axis = var_4982_axis_0, split_sizes = var_4982_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_4982_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557328704)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_4982_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_139_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_153_cast_fp16")]; + int32 var_4991 = const()[name = string("op_4991"), val = int32(-1)]; + fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4993_cast_fp16 = mul(x = x_153_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_4993_cast_fp16")]; + bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)]; + tensor input_223_cast_fp16 = concat(axis = var_4991, interleave = input_223_interleave_0, values = (x_153_cast_fp16, var_4993_cast_fp16))[name = string("input_223_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_4988_to_fp16 = const()[name = string("op_4988_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_4988_to_fp16, x = input_223_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_4998_split_sizes_0 = const()[name = string("op_4998_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4998_axis_0 = const()[name = string("op_4998_axis_0"), val = int32(-1)]; + tensor var_4998_cast_fp16_0, tensor var_4998_cast_fp16_1 = split(axis = var_4998_axis_0, split_sizes = var_4998_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_4998_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557333888)))]; + tensor h_45_cast_fp16 = mul(x = var_4998_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_5009 = const()[name = string("op_5009"), val = tensor([0, 2, 1])]; + tensor input_225_axes_0 = const()[name = string("input_225_axes_0"), val = tensor([2])]; + tensor var_5010 = transpose(perm = var_5009, x = h_45_cast_fp16)[name = string("transpose_76")]; + tensor input_225 = expand_dims(axes = input_225_axes_0, x = var_5010)[name = string("input_225")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_225)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_225)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_227 = mul(x = gate_31, y = up_15)[name = string("input_227")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_227)[name = string("mlp_out_15")]; + tensor var_5050_axes_0 = const()[name = string("op_5050_axes_0"), val = tensor([2])]; + tensor var_5050 = squeeze(axes = var_5050_axes_0, x = mlp_out_15)[name = string("op_5050")]; + tensor var_5054 = const()[name = string("op_5054"), val = tensor([0, 2, 1])]; + int32 var_5060 = const()[name = string("op_5060"), val = int32(-1)]; + fp16 const_91_promoted = const()[name = string("const_91_promoted"), val = fp16(-0x1p+0)]; + tensor x_155 = transpose(perm = var_5054, x = var_5050)[name = string("transpose_75")]; + tensor var_5062 = mul(x = x_155, y = const_91_promoted)[name = string("op_5062")]; + bool input_229_interleave_0 = const()[name = string("input_229_interleave_0"), val = bool(false)]; + tensor input_229 = concat(axis = var_5060, interleave = input_229_interleave_0, values = (x_155, var_5062))[name = string("input_229")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_5057_to_fp16 = const()[name = string("op_5057_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_5057_to_fp16, x = input_229)[name = string("normed_217_cast_fp16")]; + tensor var_5067_split_sizes_0 = const()[name = string("op_5067_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5067_axis_0 = const()[name = string("op_5067_axis_0"), val = int32(-1)]; + tensor var_5067_0, tensor var_5067_1 = split(axis = var_5067_axis_0, split_sizes = var_5067_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_5067")]; + tensor hidden_states_73 = mul(x = var_5067_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_153_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 4864])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 1, 5120])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_5095 = const()[name = string("op_5095"), val = tensor([0, 2, 1])]; + tensor input_231_axes_0 = const()[name = string("input_231_axes_0"), val = tensor([2])]; + tensor var_5096 = transpose(perm = var_5095, x = hidden_states_75_cast_fp16)[name = string("transpose_74")]; + tensor input_231 = expand_dims(axes = input_231_axes_0, x = var_5096)[name = string("input_231")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_231)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_5115 = const()[name = string("op_5115"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_5116_cast_fp16 = transpose(perm = var_5115, x = per_layer_slice_15_cast_fp16)[name = string("transpose_73")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_5116_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_233_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_233_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557339072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557666816))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_233_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_5132_axes_0 = const()[name = string("op_5132_axes_0"), val = tensor([2])]; + tensor var_5132_cast_fp16 = squeeze(axes = var_5132_axes_0, x = gated_47_cast_fp16)[name = string("op_5132_cast_fp16")]; + tensor var_5136 = const()[name = string("op_5136"), val = tensor([0, 2, 1])]; + int32 var_5142 = const()[name = string("op_5142"), val = int32(-1)]; + fp16 const_92_promoted_to_fp16 = const()[name = string("const_92_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_157_cast_fp16 = transpose(perm = var_5136, x = var_5132_cast_fp16)[name = string("transpose_72")]; + tensor var_5144_cast_fp16 = mul(x = x_157_cast_fp16, y = const_92_promoted_to_fp16)[name = string("op_5144_cast_fp16")]; + bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)]; + tensor input_235_cast_fp16 = concat(axis = var_5142, interleave = input_235_interleave_0, values = (x_157_cast_fp16, var_5144_cast_fp16))[name = string("input_235_cast_fp16")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_5139_to_fp16 = const()[name = string("op_5139_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_5139_to_fp16, x = input_235_cast_fp16)[name = string("normed_221_cast_fp16")]; + tensor var_5149_split_sizes_0 = const()[name = string("op_5149_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5149_axis_0 = const()[name = string("op_5149_axis_0"), val = int32(-1)]; + tensor var_5149_cast_fp16_0, tensor var_5149_cast_fp16_1 = split(axis = var_5149_axis_0, split_sizes = var_5149_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_5149_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557669440)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_5149_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = tensor([0x1.06p-1])]; + tensor x_159_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_93_promoted_to_fp16)[name = string("x_159_cast_fp16")]; + tensor var_5161_axes_0 = const()[name = string("op_5161_axes_0"), val = tensor([0])]; + tensor var_5161_cast_fp16 = squeeze(axes = var_5161_axes_0, x = K_sliding_out_13_cast_fp16)[name = string("op_5161_cast_fp16")]; + tensor var_5163_axes_0 = const()[name = string("op_5163_axes_0"), val = tensor([0])]; + tensor var_5163_cast_fp16 = squeeze(axes = var_5163_axes_0, x = V_sliding_out_13_cast_fp16)[name = string("op_5163_cast_fp16")]; + tensor var_5166_begin_0 = const()[name = string("op_5166_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5166_end_0 = const()[name = string("op_5166_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_5166_end_mask_0 = const()[name = string("op_5166_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5166_squeeze_mask_0 = const()[name = string("op_5166_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5166_cast_fp16 = slice_by_index(begin = var_5166_begin_0, end = var_5166_end_0, end_mask = var_5166_end_mask_0, squeeze_mask = var_5166_squeeze_mask_0, x = K_sliding_in)[name = string("op_5166_cast_fp16")]; + tensor K_sliding_slot_15_axes_0 = const()[name = string("K_sliding_slot_15_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_15_cast_fp16 = expand_dims(axes = K_sliding_slot_15_axes_0, x = var_5166_cast_fp16)[name = string("K_sliding_slot_15_cast_fp16")]; + tensor var_5171_begin_0 = const()[name = string("op_5171_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5171_end_0 = const()[name = string("op_5171_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_5171_end_mask_0 = const()[name = string("op_5171_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5171_squeeze_mask_0 = const()[name = string("op_5171_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5171_cast_fp16 = slice_by_index(begin = var_5171_begin_0, end = var_5171_end_0, end_mask = var_5171_end_mask_0, squeeze_mask = var_5171_squeeze_mask_0, x = V_sliding_in)[name = string("op_5171_cast_fp16")]; + tensor V_sliding_slot_15_axes_0 = const()[name = string("V_sliding_slot_15_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_15_cast_fp16 = expand_dims(axes = V_sliding_slot_15_axes_0, x = var_5171_cast_fp16)[name = string("V_sliding_slot_15_cast_fp16")]; + int32 var_5178 = const()[name = string("op_5178"), val = int32(-1)]; + fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5180_cast_fp16 = mul(x = x_159_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_5180_cast_fp16")]; + bool input_237_interleave_0 = const()[name = string("input_237_interleave_0"), val = bool(false)]; + tensor input_237_cast_fp16 = concat(axis = var_5178, interleave = input_237_interleave_0, values = (x_159_cast_fp16, var_5180_cast_fp16))[name = string("input_237_cast_fp16")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_5175_to_fp16 = const()[name = string("op_5175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_5175_to_fp16, x = input_237_cast_fp16)[name = string("normed_225_cast_fp16")]; + tensor var_5185_split_sizes_0 = const()[name = string("op_5185_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5185_axis_0 = const()[name = string("op_5185_axis_0"), val = int32(-1)]; + tensor var_5185_cast_fp16_0, tensor var_5185_cast_fp16_1 = split(axis = var_5185_axis_0, split_sizes = var_5185_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_5185_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557674624)))]; + tensor h_49_cast_fp16 = mul(x = var_5185_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_5191 = const()[name = string("op_5191"), val = tensor([0, 2, 1])]; + tensor var_5194_axes_0 = const()[name = string("op_5194_axes_0"), val = tensor([2])]; + tensor var_5192_cast_fp16 = transpose(perm = var_5191, x = h_49_cast_fp16)[name = string("transpose_71")]; + tensor var_5194_cast_fp16 = expand_dims(axes = var_5194_axes_0, x = var_5192_cast_fp16)[name = string("op_5194_cast_fp16")]; + string var_5210_pad_type_0 = const()[name = string("op_5210_pad_type_0"), val = string("valid")]; + tensor var_5210_strides_0 = const()[name = string("op_5210_strides_0"), val = tensor([1, 1])]; + tensor var_5210_pad_0 = const()[name = string("op_5210_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5210_dilations_0 = const()[name = string("op_5210_dilations_0"), val = tensor([1, 1])]; + int32 var_5210_groups_0 = const()[name = string("op_5210_groups_0"), val = int32(1)]; + tensor var_5210 = conv(dilations = var_5210_dilations_0, groups = var_5210_groups_0, pad = var_5210_pad_0, pad_type = var_5210_pad_type_0, strides = var_5210_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_5194_cast_fp16)[name = string("op_5210")]; + tensor var_5215 = const()[name = string("op_5215"), val = tensor([1, 8, 256, 1])]; + tensor var_5216 = reshape(shape = var_5215, x = var_5210)[name = string("op_5216")]; + tensor var_5221 = const()[name = string("op_5221"), val = tensor([0, 1, 3, 2])]; + tensor var_5231 = const()[name = string("op_5231"), val = tensor([1, 8, 256])]; + tensor var_5222 = transpose(perm = var_5221, x = var_5216)[name = string("transpose_70")]; + tensor x_161 = reshape(shape = var_5231, x = var_5222)[name = string("x_161")]; + int32 var_5237 = const()[name = string("op_5237"), val = int32(-1)]; + fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; + tensor var_5239 = mul(x = x_161, y = const_95_promoted)[name = string("op_5239")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241 = concat(axis = var_5237, interleave = input_241_interleave_0, values = (x_161, var_5239))[name = string("input_241")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_5234_to_fp16 = const()[name = string("op_5234_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_5234_to_fp16, x = input_241)[name = string("normed_229_cast_fp16")]; + tensor var_5244_split_sizes_0 = const()[name = string("op_5244_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5244_axis_0 = const()[name = string("op_5244_axis_0"), val = int32(-1)]; + tensor var_5244_0, tensor var_5244_1 = split(axis = var_5244_axis_0, split_sizes = var_5244_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_5244")]; + tensor var_5251 = const()[name = string("op_5251"), val = tensor([1, 8, 1, 256])]; + tensor q_67 = reshape(shape = var_5251, x = var_5244_0)[name = string("q_67")]; + tensor var_5253_cast_fp16 = mul(x = q_67, y = cos_s)[name = string("op_5253_cast_fp16")]; + tensor var_5254_split_sizes_0 = const()[name = string("op_5254_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5254_axis_0 = const()[name = string("op_5254_axis_0"), val = int32(-1)]; + tensor var_5254_0, tensor var_5254_1 = split(axis = var_5254_axis_0, split_sizes = var_5254_split_sizes_0, x = q_67)[name = string("op_5254")]; + fp16 const_96_promoted = const()[name = string("const_96_promoted"), val = fp16(-0x1p+0)]; + tensor var_5256 = mul(x = var_5254_1, y = const_96_promoted)[name = string("op_5256")]; + int32 var_5258 = const()[name = string("op_5258"), val = int32(-1)]; + bool var_5259_interleave_0 = const()[name = string("op_5259_interleave_0"), val = bool(false)]; + tensor var_5259 = concat(axis = var_5258, interleave = var_5259_interleave_0, values = (var_5256, var_5254_0))[name = string("op_5259")]; + tensor var_5260_cast_fp16 = mul(x = var_5259, y = sin_s)[name = string("op_5260_cast_fp16")]; + tensor q_71_cast_fp16 = add(x = var_5253_cast_fp16, y = var_5260_cast_fp16)[name = string("q_71_cast_fp16")]; + string var_5273_pad_type_0 = const()[name = string("op_5273_pad_type_0"), val = string("valid")]; + tensor var_5273_strides_0 = const()[name = string("op_5273_strides_0"), val = tensor([1, 1])]; + tensor var_5273_pad_0 = const()[name = string("op_5273_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5273_dilations_0 = const()[name = string("op_5273_dilations_0"), val = tensor([1, 1])]; + int32 var_5273_groups_0 = const()[name = string("op_5273_groups_0"), val = int32(1)]; + tensor var_5273 = conv(dilations = var_5273_dilations_0, groups = var_5273_groups_0, pad = var_5273_pad_0, pad_type = var_5273_pad_type_0, strides = var_5273_strides_0, weight = layers_8_self_attn_k_proj_weight_palettized, x = var_5194_cast_fp16)[name = string("op_5273")]; + tensor var_5278 = const()[name = string("op_5278"), val = tensor([1, 2, 256, 1])]; + tensor var_5279 = reshape(shape = var_5278, x = var_5273)[name = string("op_5279")]; + tensor var_5284 = const()[name = string("op_5284"), val = tensor([0, 1, 3, 2])]; + string var_5301_pad_type_0 = const()[name = string("op_5301_pad_type_0"), val = string("valid")]; + tensor var_5301_strides_0 = const()[name = string("op_5301_strides_0"), val = tensor([1, 1])]; + tensor var_5301_pad_0 = const()[name = string("op_5301_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5301_dilations_0 = const()[name = string("op_5301_dilations_0"), val = tensor([1, 1])]; + int32 var_5301_groups_0 = const()[name = string("op_5301_groups_0"), val = int32(1)]; + tensor var_5301 = conv(dilations = var_5301_dilations_0, groups = var_5301_groups_0, pad = var_5301_pad_0, pad_type = var_5301_pad_type_0, strides = var_5301_strides_0, weight = layers_8_self_attn_v_proj_weight_palettized, x = var_5194_cast_fp16)[name = string("op_5301")]; + tensor var_5306 = const()[name = string("op_5306"), val = tensor([1, 2, 256, 1])]; + tensor var_5307 = reshape(shape = var_5306, x = var_5301)[name = string("op_5307")]; + tensor var_5312 = const()[name = string("op_5312"), val = tensor([0, 1, 3, 2])]; + tensor var_5322 = const()[name = string("op_5322"), val = tensor([1, 2, 256])]; + tensor var_5285 = transpose(perm = var_5284, x = var_5279)[name = string("transpose_69")]; + tensor x_163 = reshape(shape = var_5322, x = var_5285)[name = string("x_163")]; + int32 var_5328 = const()[name = string("op_5328"), val = int32(-1)]; + fp16 const_97_promoted = const()[name = string("const_97_promoted"), val = fp16(-0x1p+0)]; + tensor var_5330 = mul(x = x_163, y = const_97_promoted)[name = string("op_5330")]; + bool input_243_interleave_0 = const()[name = string("input_243_interleave_0"), val = bool(false)]; + tensor input_243 = concat(axis = var_5328, interleave = input_243_interleave_0, values = (x_163, var_5330))[name = string("input_243")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_5325_to_fp16 = const()[name = string("op_5325_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_5325_to_fp16, x = input_243)[name = string("normed_233_cast_fp16")]; + tensor var_5335_split_sizes_0 = const()[name = string("op_5335_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5335_axis_0 = const()[name = string("op_5335_axis_0"), val = int32(-1)]; + tensor var_5335_0, tensor var_5335_1 = split(axis = var_5335_axis_0, split_sizes = var_5335_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_5335")]; + tensor var_5337 = mul(x = var_5335_0, y = layers_8_self_attn_k_norm_weight)[name = string("op_5337")]; + tensor var_5342 = const()[name = string("op_5342"), val = tensor([1, 2, 1, 256])]; + tensor q_69 = reshape(shape = var_5342, x = var_5337)[name = string("q_69")]; + fp16 var_5344_promoted = const()[name = string("op_5344_promoted"), val = fp16(0x1p+1)]; + tensor var_5313 = transpose(perm = var_5312, x = var_5307)[name = string("transpose_68")]; + tensor var_5345 = pow(x = var_5313, y = var_5344_promoted)[name = string("op_5345")]; + tensor var_5350_axes_0 = const()[name = string("op_5350_axes_0"), val = tensor([-1])]; + bool var_5350_keep_dims_0 = const()[name = string("op_5350_keep_dims_0"), val = bool(true)]; + tensor var_5350 = reduce_mean(axes = var_5350_axes_0, keep_dims = var_5350_keep_dims_0, x = var_5345)[name = string("op_5350")]; + fp16 var_5352_to_fp16 = const()[name = string("op_5352_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_17_cast_fp16 = add(x = var_5350, y = var_5352_to_fp16)[name = string("mean_sq_17_cast_fp16")]; + fp32 var_5354_epsilon_0 = const()[name = string("op_5354_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5354_cast_fp16 = rsqrt(epsilon = var_5354_epsilon_0, x = mean_sq_17_cast_fp16)[name = string("op_5354_cast_fp16")]; + tensor input_247_cast_fp16 = mul(x = var_5313, y = var_5354_cast_fp16)[name = string("input_247_cast_fp16")]; + tensor var_5356_cast_fp16 = mul(x = q_69, y = cos_s)[name = string("op_5356_cast_fp16")]; + tensor var_5357_split_sizes_0 = const()[name = string("op_5357_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5357_axis_0 = const()[name = string("op_5357_axis_0"), val = int32(-1)]; + tensor var_5357_0, tensor var_5357_1 = split(axis = var_5357_axis_0, split_sizes = var_5357_split_sizes_0, x = q_69)[name = string("op_5357")]; + fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; + tensor var_5359 = mul(x = var_5357_1, y = const_98_promoted)[name = string("op_5359")]; + int32 var_5361 = const()[name = string("op_5361"), val = int32(-1)]; + bool var_5362_interleave_0 = const()[name = string("op_5362_interleave_0"), val = bool(false)]; + tensor var_5362 = concat(axis = var_5361, interleave = var_5362_interleave_0, values = (var_5359, var_5357_0))[name = string("op_5362")]; + tensor var_5363_cast_fp16 = mul(x = var_5362, y = sin_s)[name = string("op_5363_cast_fp16")]; + tensor input_245_cast_fp16 = add(x = var_5356_cast_fp16, y = var_5363_cast_fp16)[name = string("input_245_cast_fp16")]; + tensor k_padded_15_pad_0 = const()[name = string("k_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_15_mode_0 = const()[name = string("k_padded_15_mode_0"), val = string("constant")]; + fp16 const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_15_cast_fp16 = pad(constant_val = const_99_to_fp16, mode = k_padded_15_mode_0, pad = k_padded_15_pad_0, x = input_245_cast_fp16)[name = string("k_padded_15_cast_fp16")]; + tensor v_padded_15_pad_0 = const()[name = string("v_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_15_mode_0 = const()[name = string("v_padded_15_mode_0"), val = string("constant")]; + fp16 const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_15_cast_fp16 = pad(constant_val = const_100_to_fp16, mode = v_padded_15_mode_0, pad = v_padded_15_pad_0, x = input_247_cast_fp16)[name = string("v_padded_15_cast_fp16")]; + tensor var_5392_begin_0 = const()[name = string("op_5392_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5392_end_0 = const()[name = string("op_5392_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5392_end_mask_0 = const()[name = string("op_5392_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5392_cast_fp16 = slice_by_index(begin = var_5392_begin_0, end = var_5392_end_0, end_mask = var_5392_end_mask_0, x = K_sliding_slot_15_cast_fp16)[name = string("op_5392_cast_fp16")]; + int32 var_5399 = const()[name = string("op_5399"), val = int32(2)]; + bool K_sliding_out_15_interleave_0 = const()[name = string("K_sliding_out_15_interleave_0"), val = bool(false)]; + tensor K_sliding_out_15_cast_fp16 = concat(axis = var_5399, interleave = K_sliding_out_15_interleave_0, values = (var_5392_cast_fp16, k_padded_15_cast_fp16))[name = string("K_sliding_out_15_cast_fp16")]; + tensor var_5415_begin_0 = const()[name = string("op_5415_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5415_end_0 = const()[name = string("op_5415_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5415_end_mask_0 = const()[name = string("op_5415_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5415_cast_fp16 = slice_by_index(begin = var_5415_begin_0, end = var_5415_end_0, end_mask = var_5415_end_mask_0, x = V_sliding_slot_15_cast_fp16)[name = string("op_5415_cast_fp16")]; + int32 var_5422 = const()[name = string("op_5422"), val = int32(2)]; + bool V_sliding_out_15_interleave_0 = const()[name = string("V_sliding_out_15_interleave_0"), val = bool(false)]; + tensor V_sliding_out_15_cast_fp16 = concat(axis = var_5422, interleave = V_sliding_out_15_interleave_0, values = (var_5415_cast_fp16, v_padded_15_cast_fp16))[name = string("V_sliding_out_15_cast_fp16")]; + tensor K_for_attn_17_begin_0 = const()[name = string("K_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_17_end_0 = const()[name = string("K_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_17_end_mask_0 = const()[name = string("K_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_17_cast_fp16 = slice_by_index(begin = K_for_attn_17_begin_0, end = K_for_attn_17_end_0, end_mask = K_for_attn_17_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("K_for_attn_17_cast_fp16")]; + tensor V_for_attn_17_begin_0 = const()[name = string("V_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_17_end_0 = const()[name = string("V_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_17_end_mask_0 = const()[name = string("V_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_17_cast_fp16 = slice_by_index(begin = V_for_attn_17_begin_0, end = V_for_attn_17_end_0, end_mask = V_for_attn_17_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("V_for_attn_17_cast_fp16")]; + tensor transpose_32_perm_0 = const()[name = string("transpose_32_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_16_reps_0 = const()[name = string("tile_16_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_32_cast_fp16 = transpose(perm = transpose_32_perm_0, x = K_for_attn_17_cast_fp16)[name = string("transpose_67")]; + tensor tile_16_cast_fp16 = tile(reps = tile_16_reps_0, x = transpose_32_cast_fp16)[name = string("tile_16_cast_fp16")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_32_cast_fp16 = reshape(shape = concat_32, x = tile_16_cast_fp16)[name = string("reshape_32_cast_fp16")]; + tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_33 = const()[name = string("concat_33"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_33_cast_fp16 = transpose(perm = transpose_33_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_66")]; + tensor reshape_33_cast_fp16 = reshape(shape = concat_33, x = transpose_33_cast_fp16)[name = string("reshape_33_cast_fp16")]; + tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_17_reps_0 = const()[name = string("tile_17_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_34_cast_fp16 = transpose(perm = transpose_34_perm_0, x = V_for_attn_17_cast_fp16)[name = string("transpose_65")]; + tensor tile_17_cast_fp16 = tile(reps = tile_17_reps_0, x = transpose_34_cast_fp16)[name = string("tile_17_cast_fp16")]; + tensor concat_34 = const()[name = string("concat_34"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_34_cast_fp16 = reshape(shape = concat_34, x = tile_17_cast_fp16)[name = string("reshape_34_cast_fp16")]; + tensor transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_35_cast_fp16 = transpose(perm = transpose_35_perm_0, x = reshape_34_cast_fp16)[name = string("transpose_64")]; + tensor reshape_35_cast_fp16 = reshape(shape = concat_35, x = transpose_35_cast_fp16)[name = string("reshape_35_cast_fp16")]; + tensor V_expanded_17_perm_0 = const()[name = string("V_expanded_17_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor transpose_56_cast_fp16 = transpose(perm = transpose_56_perm_0, x = reshape_33_cast_fp16)[name = string("transpose_63")]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_71_cast_fp16, y = transpose_56_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_167_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_167_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_167_cast_fp16)[name = string("reduce_max_8")]; + tensor var_5463 = sub(x = x_167_cast_fp16, y = reduce_max_8)[name = string("op_5463")]; + tensor var_5469 = exp(x = var_5463)[name = string("op_5469")]; + tensor var_5479_axes_0 = const()[name = string("op_5479_axes_0"), val = tensor([-1])]; + bool var_5479_keep_dims_0 = const()[name = string("op_5479_keep_dims_0"), val = bool(true)]; + tensor var_5479 = reduce_sum(axes = var_5479_axes_0, keep_dims = var_5479_keep_dims_0, x = var_5469)[name = string("op_5479")]; + tensor var_5485_cast_fp16 = real_div(x = var_5469, y = var_5479)[name = string("op_5485_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor V_expanded_17_cast_fp16 = transpose(perm = V_expanded_17_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_62")]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_5485_cast_fp16, y = V_expanded_17_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_5496 = const()[name = string("op_5496"), val = tensor([0, 2, 1, 3])]; + tensor var_5503 = const()[name = string("op_5503"), val = tensor([1, 1, -1])]; + tensor var_5497_cast_fp16 = transpose(perm = var_5496, x = attn_output_49_cast_fp16)[name = string("transpose_61")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_5503, x = var_5497_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_5508 = const()[name = string("op_5508"), val = tensor([0, 2, 1])]; + string var_5524_pad_type_0 = const()[name = string("op_5524_pad_type_0"), val = string("valid")]; + int32 var_5524_groups_0 = const()[name = string("op_5524_groups_0"), val = int32(1)]; + tensor var_5524_strides_0 = const()[name = string("op_5524_strides_0"), val = tensor([1])]; + tensor var_5524_pad_0 = const()[name = string("op_5524_pad_0"), val = tensor([0, 0])]; + tensor var_5524_dilations_0 = const()[name = string("op_5524_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557679808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560301312))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5509_cast_fp16 = transpose(perm = var_5508, x = attn_output_51_cast_fp16)[name = string("transpose_60")]; + tensor var_5524_cast_fp16 = conv(dilations = var_5524_dilations_0, groups = var_5524_groups_0, pad = var_5524_pad_0, pad_type = var_5524_pad_type_0, strides = var_5524_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5509_cast_fp16)[name = string("op_5524_cast_fp16")]; + tensor var_5528 = const()[name = string("op_5528"), val = tensor([0, 2, 1])]; + int32 var_5534 = const()[name = string("op_5534"), val = int32(-1)]; + fp16 const_101_promoted_to_fp16 = const()[name = string("const_101_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_171_cast_fp16 = transpose(perm = var_5528, x = var_5524_cast_fp16)[name = string("transpose_59")]; + tensor var_5536_cast_fp16 = mul(x = x_171_cast_fp16, y = const_101_promoted_to_fp16)[name = string("op_5536_cast_fp16")]; + bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; + tensor input_251_cast_fp16 = concat(axis = var_5534, interleave = input_251_interleave_0, values = (x_171_cast_fp16, var_5536_cast_fp16))[name = string("input_251_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_5531_to_fp16 = const()[name = string("op_5531_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_5531_to_fp16, x = input_251_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor var_5541_split_sizes_0 = const()[name = string("op_5541_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5541_axis_0 = const()[name = string("op_5541_axis_0"), val = int32(-1)]; + tensor var_5541_cast_fp16_0, tensor var_5541_cast_fp16_1 = split(axis = var_5541_axis_0, split_sizes = var_5541_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_5541_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560303936)))]; + tensor attn_output_53_cast_fp16 = mul(x = var_5541_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor x_173_cast_fp16 = add(x = x_159_cast_fp16, y = attn_output_53_cast_fp16)[name = string("x_173_cast_fp16")]; + int32 var_5550 = const()[name = string("op_5550"), val = int32(-1)]; + fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5552_cast_fp16 = mul(x = x_173_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_5552_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_5550, interleave = input_253_interleave_0, values = (x_173_cast_fp16, var_5552_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_5547_to_fp16 = const()[name = string("op_5547_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_5547_to_fp16, x = input_253_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor var_5557_split_sizes_0 = const()[name = string("op_5557_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5557_axis_0 = const()[name = string("op_5557_axis_0"), val = int32(-1)]; + tensor var_5557_cast_fp16_0, tensor var_5557_cast_fp16_1 = split(axis = var_5557_axis_0, split_sizes = var_5557_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_5557_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560309120)))]; + tensor h_51_cast_fp16 = mul(x = var_5557_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_5568 = const()[name = string("op_5568"), val = tensor([0, 2, 1])]; + tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; + tensor var_5569 = transpose(perm = var_5568, x = h_51_cast_fp16)[name = string("transpose_58")]; + tensor input_255 = expand_dims(axes = input_255_axes_0, x = var_5569)[name = string("input_255")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_255)[name = string("gate_33")]; + string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; + tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; + tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; + int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; + tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_255)[name = string("up_17")]; + string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; + tensor input_257 = mul(x = gate_35, y = up_17)[name = string("input_257")]; + string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")]; + tensor mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)]; + tensor mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_257)[name = string("mlp_out_17")]; + tensor var_5609_axes_0 = const()[name = string("op_5609_axes_0"), val = tensor([2])]; + tensor var_5609 = squeeze(axes = var_5609_axes_0, x = mlp_out_17)[name = string("op_5609")]; + tensor var_5613 = const()[name = string("op_5613"), val = tensor([0, 2, 1])]; + int32 var_5619 = const()[name = string("op_5619"), val = int32(-1)]; + fp16 const_103_promoted = const()[name = string("const_103_promoted"), val = fp16(-0x1p+0)]; + tensor x_175 = transpose(perm = var_5613, x = var_5609)[name = string("transpose_57")]; + tensor var_5621 = mul(x = x_175, y = const_103_promoted)[name = string("op_5621")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259 = concat(axis = var_5619, interleave = input_259_interleave_0, values = (x_175, var_5621))[name = string("input_259")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_5616_to_fp16 = const()[name = string("op_5616_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_5616_to_fp16, x = input_259)[name = string("normed_245_cast_fp16")]; + tensor var_5626_split_sizes_0 = const()[name = string("op_5626_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5626_axis_0 = const()[name = string("op_5626_axis_0"), val = int32(-1)]; + tensor var_5626_0, tensor var_5626_1 = split(axis = var_5626_axis_0, split_sizes = var_5626_split_sizes_0, x = normed_245_cast_fp16)[name = string("op_5626")]; + tensor hidden_states_83 = mul(x = var_5626_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_173_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor([0, 0, 5120])]; + tensor per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor([1, 1, 5376])]; + tensor per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_17_cast_fp16 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_17_cast_fp16")]; + tensor var_5654 = const()[name = string("op_5654"), val = tensor([0, 2, 1])]; + tensor input_261_axes_0 = const()[name = string("input_261_axes_0"), val = tensor([2])]; + tensor var_5655 = transpose(perm = var_5654, x = hidden_states_85_cast_fp16)[name = string("transpose_56")]; + tensor input_261 = expand_dims(axes = input_261_axes_0, x = var_5655)[name = string("input_261")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_261)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_5674 = const()[name = string("op_5674"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_17_axes_0 = const()[name = string("per_layer_slice_conv_17_axes_0"), val = tensor([2])]; + tensor var_5675_cast_fp16 = transpose(perm = var_5674, x = per_layer_slice_17_cast_fp16)[name = string("transpose_55")]; + tensor per_layer_slice_conv_17_cast_fp16 = expand_dims(axes = per_layer_slice_conv_17_axes_0, x = var_5675_cast_fp16)[name = string("per_layer_slice_conv_17_cast_fp16")]; + tensor input_263_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_17_cast_fp16)[name = string("input_263_cast_fp16")]; + string gated_53_pad_type_0 = const()[name = string("gated_53_pad_type_0"), val = string("valid")]; + tensor gated_53_strides_0 = const()[name = string("gated_53_strides_0"), val = tensor([1, 1])]; + tensor gated_53_pad_0 = const()[name = string("gated_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_53_dilations_0 = const()[name = string("gated_53_dilations_0"), val = tensor([1, 1])]; + int32 gated_53_groups_0 = const()[name = string("gated_53_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560314304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560642048))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_53_cast_fp16 = conv(dilations = gated_53_dilations_0, groups = gated_53_groups_0, pad = gated_53_pad_0, pad_type = gated_53_pad_type_0, strides = gated_53_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_263_cast_fp16)[name = string("gated_53_cast_fp16")]; + tensor var_5691_axes_0 = const()[name = string("op_5691_axes_0"), val = tensor([2])]; + tensor var_5691_cast_fp16 = squeeze(axes = var_5691_axes_0, x = gated_53_cast_fp16)[name = string("op_5691_cast_fp16")]; + tensor var_5695 = const()[name = string("op_5695"), val = tensor([0, 2, 1])]; + int32 var_5701 = const()[name = string("op_5701"), val = int32(-1)]; + fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_177_cast_fp16 = transpose(perm = var_5695, x = var_5691_cast_fp16)[name = string("transpose_54")]; + tensor var_5703_cast_fp16 = mul(x = x_177_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_5703_cast_fp16")]; + bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; + tensor input_265_cast_fp16 = concat(axis = var_5701, interleave = input_265_interleave_0, values = (x_177_cast_fp16, var_5703_cast_fp16))[name = string("input_265_cast_fp16")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_5698_to_fp16 = const()[name = string("op_5698_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_5698_to_fp16, x = input_265_cast_fp16)[name = string("normed_249_cast_fp16")]; + tensor var_5708_split_sizes_0 = const()[name = string("op_5708_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5708_axis_0 = const()[name = string("op_5708_axis_0"), val = int32(-1)]; + tensor var_5708_cast_fp16_0, tensor var_5708_cast_fp16_1 = split(axis = var_5708_axis_0, split_sizes = var_5708_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_5708_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560644672)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_5708_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_91_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; + tensor const_105_promoted_to_fp16 = const()[name = string("const_105_promoted_to_fp16"), val = tensor([0x1.bap-2])]; + tensor x_179_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_105_promoted_to_fp16)[name = string("x_179_cast_fp16")]; + tensor var_5720_axes_0 = const()[name = string("op_5720_axes_0"), val = tensor([0])]; + tensor var_5720_cast_fp16 = squeeze(axes = var_5720_axes_0, x = K_sliding_out_15_cast_fp16)[name = string("op_5720_cast_fp16")]; + tensor var_5722_axes_0 = const()[name = string("op_5722_axes_0"), val = tensor([0])]; + tensor var_5722_cast_fp16 = squeeze(axes = var_5722_axes_0, x = V_sliding_out_15_cast_fp16)[name = string("op_5722_cast_fp16")]; + tensor var_5725_begin_0 = const()[name = string("op_5725_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_5725_end_0 = const()[name = string("op_5725_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_5725_end_mask_0 = const()[name = string("op_5725_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5725_squeeze_mask_0 = const()[name = string("op_5725_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5725_cast_fp16 = slice_by_index(begin = var_5725_begin_0, end = var_5725_end_0, end_mask = var_5725_end_mask_0, squeeze_mask = var_5725_squeeze_mask_0, x = K_sliding_in)[name = string("op_5725_cast_fp16")]; + tensor K_sliding_slot_17_axes_0 = const()[name = string("K_sliding_slot_17_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_17_cast_fp16 = expand_dims(axes = K_sliding_slot_17_axes_0, x = var_5725_cast_fp16)[name = string("K_sliding_slot_17_cast_fp16")]; + tensor var_5730_begin_0 = const()[name = string("op_5730_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_5730_end_0 = const()[name = string("op_5730_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_5730_end_mask_0 = const()[name = string("op_5730_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5730_squeeze_mask_0 = const()[name = string("op_5730_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5730_cast_fp16 = slice_by_index(begin = var_5730_begin_0, end = var_5730_end_0, end_mask = var_5730_end_mask_0, squeeze_mask = var_5730_squeeze_mask_0, x = V_sliding_in)[name = string("op_5730_cast_fp16")]; + tensor V_sliding_slot_17_axes_0 = const()[name = string("V_sliding_slot_17_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_17_cast_fp16 = expand_dims(axes = V_sliding_slot_17_axes_0, x = var_5730_cast_fp16)[name = string("V_sliding_slot_17_cast_fp16")]; + int32 var_5737 = const()[name = string("op_5737"), val = int32(-1)]; + fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5739_cast_fp16 = mul(x = x_179_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_5739_cast_fp16")]; + bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; + tensor input_267_cast_fp16 = concat(axis = var_5737, interleave = input_267_interleave_0, values = (x_179_cast_fp16, var_5739_cast_fp16))[name = string("input_267_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_5734_to_fp16 = const()[name = string("op_5734_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_5734_to_fp16, x = input_267_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor var_5744_split_sizes_0 = const()[name = string("op_5744_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5744_axis_0 = const()[name = string("op_5744_axis_0"), val = int32(-1)]; + tensor var_5744_cast_fp16_0, tensor var_5744_cast_fp16_1 = split(axis = var_5744_axis_0, split_sizes = var_5744_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_5744_cast_fp16")]; + tensor layers_9_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560649856)))]; + tensor h_55_cast_fp16 = mul(x = var_5744_cast_fp16_0, y = layers_9_input_layernorm_weight_promoted_to_fp16)[name = string("h_55_cast_fp16")]; + tensor var_5750 = const()[name = string("op_5750"), val = tensor([0, 2, 1])]; + tensor var_5753_axes_0 = const()[name = string("op_5753_axes_0"), val = tensor([2])]; + tensor var_5751_cast_fp16 = transpose(perm = var_5750, x = h_55_cast_fp16)[name = string("transpose_53")]; + tensor var_5753_cast_fp16 = expand_dims(axes = var_5753_axes_0, x = var_5751_cast_fp16)[name = string("op_5753_cast_fp16")]; + string var_5769_pad_type_0 = const()[name = string("op_5769_pad_type_0"), val = string("valid")]; + tensor var_5769_strides_0 = const()[name = string("op_5769_strides_0"), val = tensor([1, 1])]; + tensor var_5769_pad_0 = const()[name = string("op_5769_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5769_dilations_0 = const()[name = string("op_5769_dilations_0"), val = tensor([1, 1])]; + int32 var_5769_groups_0 = const()[name = string("op_5769_groups_0"), val = int32(1)]; + tensor var_5769 = conv(dilations = var_5769_dilations_0, groups = var_5769_groups_0, pad = var_5769_pad_0, pad_type = var_5769_pad_type_0, strides = var_5769_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_5753_cast_fp16)[name = string("op_5769")]; + tensor var_5774 = const()[name = string("op_5774"), val = tensor([1, 8, 256, 1])]; + tensor var_5775 = reshape(shape = var_5774, x = var_5769)[name = string("op_5775")]; + tensor var_5780 = const()[name = string("op_5780"), val = tensor([0, 1, 3, 2])]; + tensor var_5790 = const()[name = string("op_5790"), val = tensor([1, 8, 256])]; + tensor var_5781 = transpose(perm = var_5780, x = var_5775)[name = string("transpose_52")]; + tensor x_181 = reshape(shape = var_5790, x = var_5781)[name = string("x_181")]; + int32 var_5796 = const()[name = string("op_5796"), val = int32(-1)]; + fp16 const_107_promoted = const()[name = string("const_107_promoted"), val = fp16(-0x1p+0)]; + tensor var_5798 = mul(x = x_181, y = const_107_promoted)[name = string("op_5798")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271 = concat(axis = var_5796, interleave = input_271_interleave_0, values = (x_181, var_5798))[name = string("input_271")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_5793_to_fp16 = const()[name = string("op_5793_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_5793_to_fp16, x = input_271)[name = string("normed_257_cast_fp16")]; + tensor var_5803_split_sizes_0 = const()[name = string("op_5803_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5803_axis_0 = const()[name = string("op_5803_axis_0"), val = int32(-1)]; + tensor var_5803_0, tensor var_5803_1 = split(axis = var_5803_axis_0, split_sizes = var_5803_split_sizes_0, x = normed_257_cast_fp16)[name = string("op_5803")]; + tensor var_5805 = mul(x = var_5803_0, y = layers_9_self_attn_q_norm_weight)[name = string("op_5805")]; + tensor var_5810 = const()[name = string("op_5810"), val = tensor([1, 8, 1, 256])]; + tensor q_75 = reshape(shape = var_5810, x = var_5805)[name = string("q_75")]; + tensor var_5812_cast_fp16 = mul(x = q_75, y = cos_s)[name = string("op_5812_cast_fp16")]; + tensor var_5813_split_sizes_0 = const()[name = string("op_5813_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5813_axis_0 = const()[name = string("op_5813_axis_0"), val = int32(-1)]; + tensor var_5813_0, tensor var_5813_1 = split(axis = var_5813_axis_0, split_sizes = var_5813_split_sizes_0, x = q_75)[name = string("op_5813")]; + fp16 const_108_promoted = const()[name = string("const_108_promoted"), val = fp16(-0x1p+0)]; + tensor var_5815 = mul(x = var_5813_1, y = const_108_promoted)[name = string("op_5815")]; + int32 var_5817 = const()[name = string("op_5817"), val = int32(-1)]; + bool var_5818_interleave_0 = const()[name = string("op_5818_interleave_0"), val = bool(false)]; + tensor var_5818 = concat(axis = var_5817, interleave = var_5818_interleave_0, values = (var_5815, var_5813_0))[name = string("op_5818")]; + tensor var_5819_cast_fp16 = mul(x = var_5818, y = sin_s)[name = string("op_5819_cast_fp16")]; + tensor q_79_cast_fp16 = add(x = var_5812_cast_fp16, y = var_5819_cast_fp16)[name = string("q_79_cast_fp16")]; + string var_5832_pad_type_0 = const()[name = string("op_5832_pad_type_0"), val = string("valid")]; + tensor var_5832_strides_0 = const()[name = string("op_5832_strides_0"), val = tensor([1, 1])]; + tensor var_5832_pad_0 = const()[name = string("op_5832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5832_dilations_0 = const()[name = string("op_5832_dilations_0"), val = tensor([1, 1])]; + int32 var_5832_groups_0 = const()[name = string("op_5832_groups_0"), val = int32(1)]; + tensor var_5832 = conv(dilations = var_5832_dilations_0, groups = var_5832_groups_0, pad = var_5832_pad_0, pad_type = var_5832_pad_type_0, strides = var_5832_strides_0, weight = layers_9_self_attn_k_proj_weight_palettized, x = var_5753_cast_fp16)[name = string("op_5832")]; + tensor var_5837 = const()[name = string("op_5837"), val = tensor([1, 2, 256, 1])]; + tensor var_5838 = reshape(shape = var_5837, x = var_5832)[name = string("op_5838")]; + tensor var_5843 = const()[name = string("op_5843"), val = tensor([0, 1, 3, 2])]; + string var_5860_pad_type_0 = const()[name = string("op_5860_pad_type_0"), val = string("valid")]; + tensor var_5860_strides_0 = const()[name = string("op_5860_strides_0"), val = tensor([1, 1])]; + tensor var_5860_pad_0 = const()[name = string("op_5860_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5860_dilations_0 = const()[name = string("op_5860_dilations_0"), val = tensor([1, 1])]; + int32 var_5860_groups_0 = const()[name = string("op_5860_groups_0"), val = int32(1)]; + tensor var_5860 = conv(dilations = var_5860_dilations_0, groups = var_5860_groups_0, pad = var_5860_pad_0, pad_type = var_5860_pad_type_0, strides = var_5860_strides_0, weight = layers_9_self_attn_v_proj_weight_palettized, x = var_5753_cast_fp16)[name = string("op_5860")]; + tensor var_5865 = const()[name = string("op_5865"), val = tensor([1, 2, 256, 1])]; + tensor var_5866 = reshape(shape = var_5865, x = var_5860)[name = string("op_5866")]; + tensor var_5871 = const()[name = string("op_5871"), val = tensor([0, 1, 3, 2])]; + tensor var_5881 = const()[name = string("op_5881"), val = tensor([1, 2, 256])]; + tensor var_5844 = transpose(perm = var_5843, x = var_5838)[name = string("transpose_51")]; + tensor x_183 = reshape(shape = var_5881, x = var_5844)[name = string("x_183")]; + int32 var_5887 = const()[name = string("op_5887"), val = int32(-1)]; + fp16 const_109_promoted = const()[name = string("const_109_promoted"), val = fp16(-0x1p+0)]; + tensor var_5889 = mul(x = x_183, y = const_109_promoted)[name = string("op_5889")]; + bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; + tensor input_273 = concat(axis = var_5887, interleave = input_273_interleave_0, values = (x_183, var_5889))[name = string("input_273")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_5884_to_fp16 = const()[name = string("op_5884_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_5884_to_fp16, x = input_273)[name = string("normed_261_cast_fp16")]; + tensor var_5894_split_sizes_0 = const()[name = string("op_5894_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5894_axis_0 = const()[name = string("op_5894_axis_0"), val = int32(-1)]; + tensor var_5894_0, tensor var_5894_1 = split(axis = var_5894_axis_0, split_sizes = var_5894_split_sizes_0, x = normed_261_cast_fp16)[name = string("op_5894")]; + tensor var_5896 = mul(x = var_5894_0, y = layers_9_self_attn_k_norm_weight)[name = string("op_5896")]; + tensor var_5901 = const()[name = string("op_5901"), val = tensor([1, 2, 1, 256])]; + tensor q_77 = reshape(shape = var_5901, x = var_5896)[name = string("q_77")]; + fp16 var_5903_promoted = const()[name = string("op_5903_promoted"), val = fp16(0x1p+1)]; + tensor var_5872 = transpose(perm = var_5871, x = var_5866)[name = string("transpose_50")]; + tensor var_5904 = pow(x = var_5872, y = var_5903_promoted)[name = string("op_5904")]; + tensor var_5909_axes_0 = const()[name = string("op_5909_axes_0"), val = tensor([-1])]; + bool var_5909_keep_dims_0 = const()[name = string("op_5909_keep_dims_0"), val = bool(true)]; + tensor var_5909 = reduce_mean(axes = var_5909_axes_0, keep_dims = var_5909_keep_dims_0, x = var_5904)[name = string("op_5909")]; + fp16 var_5911_to_fp16 = const()[name = string("op_5911_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_19_cast_fp16 = add(x = var_5909, y = var_5911_to_fp16)[name = string("mean_sq_19_cast_fp16")]; + fp32 var_5913_epsilon_0 = const()[name = string("op_5913_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5913_cast_fp16 = rsqrt(epsilon = var_5913_epsilon_0, x = mean_sq_19_cast_fp16)[name = string("op_5913_cast_fp16")]; + tensor input_277_cast_fp16 = mul(x = var_5872, y = var_5913_cast_fp16)[name = string("input_277_cast_fp16")]; + tensor var_5915_cast_fp16 = mul(x = q_77, y = cos_s)[name = string("op_5915_cast_fp16")]; + tensor var_5916_split_sizes_0 = const()[name = string("op_5916_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5916_axis_0 = const()[name = string("op_5916_axis_0"), val = int32(-1)]; + tensor var_5916_0, tensor var_5916_1 = split(axis = var_5916_axis_0, split_sizes = var_5916_split_sizes_0, x = q_77)[name = string("op_5916")]; + fp16 const_110_promoted = const()[name = string("const_110_promoted"), val = fp16(-0x1p+0)]; + tensor var_5918 = mul(x = var_5916_1, y = const_110_promoted)[name = string("op_5918")]; + int32 var_5920 = const()[name = string("op_5920"), val = int32(-1)]; + bool var_5921_interleave_0 = const()[name = string("op_5921_interleave_0"), val = bool(false)]; + tensor var_5921 = concat(axis = var_5920, interleave = var_5921_interleave_0, values = (var_5918, var_5916_0))[name = string("op_5921")]; + tensor var_5922_cast_fp16 = mul(x = var_5921, y = sin_s)[name = string("op_5922_cast_fp16")]; + tensor input_275_cast_fp16 = add(x = var_5915_cast_fp16, y = var_5922_cast_fp16)[name = string("input_275_cast_fp16")]; + tensor k_padded_17_pad_0 = const()[name = string("k_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_17_mode_0 = const()[name = string("k_padded_17_mode_0"), val = string("constant")]; + fp16 const_111_to_fp16 = const()[name = string("const_111_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_17_cast_fp16 = pad(constant_val = const_111_to_fp16, mode = k_padded_17_mode_0, pad = k_padded_17_pad_0, x = input_275_cast_fp16)[name = string("k_padded_17_cast_fp16")]; + tensor v_padded_17_pad_0 = const()[name = string("v_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_17_mode_0 = const()[name = string("v_padded_17_mode_0"), val = string("constant")]; + fp16 const_112_to_fp16 = const()[name = string("const_112_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_17_cast_fp16 = pad(constant_val = const_112_to_fp16, mode = v_padded_17_mode_0, pad = v_padded_17_pad_0, x = input_277_cast_fp16)[name = string("v_padded_17_cast_fp16")]; + tensor var_5951_begin_0 = const()[name = string("op_5951_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5951_end_0 = const()[name = string("op_5951_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5951_end_mask_0 = const()[name = string("op_5951_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5951_cast_fp16 = slice_by_index(begin = var_5951_begin_0, end = var_5951_end_0, end_mask = var_5951_end_mask_0, x = K_sliding_slot_17_cast_fp16)[name = string("op_5951_cast_fp16")]; + int32 var_5958 = const()[name = string("op_5958"), val = int32(2)]; + bool K_sliding_out_17_interleave_0 = const()[name = string("K_sliding_out_17_interleave_0"), val = bool(false)]; + tensor K_sliding_out_17_cast_fp16 = concat(axis = var_5958, interleave = K_sliding_out_17_interleave_0, values = (var_5951_cast_fp16, k_padded_17_cast_fp16))[name = string("K_sliding_out_17_cast_fp16")]; + tensor var_5974_begin_0 = const()[name = string("op_5974_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5974_end_0 = const()[name = string("op_5974_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5974_end_mask_0 = const()[name = string("op_5974_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5974_cast_fp16 = slice_by_index(begin = var_5974_begin_0, end = var_5974_end_0, end_mask = var_5974_end_mask_0, x = V_sliding_slot_17_cast_fp16)[name = string("op_5974_cast_fp16")]; + int32 var_5981 = const()[name = string("op_5981"), val = int32(2)]; + bool V_sliding_out_17_interleave_0 = const()[name = string("V_sliding_out_17_interleave_0"), val = bool(false)]; + tensor V_sliding_out_17_cast_fp16 = concat(axis = var_5981, interleave = V_sliding_out_17_interleave_0, values = (var_5974_cast_fp16, v_padded_17_cast_fp16))[name = string("V_sliding_out_17_cast_fp16")]; + tensor K_for_attn_19_begin_0 = const()[name = string("K_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_19_end_0 = const()[name = string("K_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_19_end_mask_0 = const()[name = string("K_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_19_cast_fp16 = slice_by_index(begin = K_for_attn_19_begin_0, end = K_for_attn_19_end_0, end_mask = K_for_attn_19_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("K_for_attn_19_cast_fp16")]; + tensor V_for_attn_19_begin_0 = const()[name = string("V_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_19_end_0 = const()[name = string("V_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_19_end_mask_0 = const()[name = string("V_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_19_cast_fp16 = slice_by_index(begin = V_for_attn_19_begin_0, end = V_for_attn_19_end_0, end_mask = V_for_attn_19_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("V_for_attn_19_cast_fp16")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_18_reps_0 = const()[name = string("tile_18_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = K_for_attn_19_cast_fp16)[name = string("transpose_49")]; + tensor tile_18_cast_fp16 = tile(reps = tile_18_reps_0, x = transpose_36_cast_fp16)[name = string("tile_18_cast_fp16")]; + tensor concat_36 = const()[name = string("concat_36"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_36_cast_fp16 = reshape(shape = concat_36, x = tile_18_cast_fp16)[name = string("reshape_36_cast_fp16")]; + tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_37 = const()[name = string("concat_37"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = reshape_36_cast_fp16)[name = string("transpose_48")]; + tensor reshape_37_cast_fp16 = reshape(shape = concat_37, x = transpose_37_cast_fp16)[name = string("reshape_37_cast_fp16")]; + tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_19_reps_0 = const()[name = string("tile_19_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_38_cast_fp16 = transpose(perm = transpose_38_perm_0, x = V_for_attn_19_cast_fp16)[name = string("transpose_47")]; + tensor tile_19_cast_fp16 = tile(reps = tile_19_reps_0, x = transpose_38_cast_fp16)[name = string("tile_19_cast_fp16")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_38_cast_fp16 = reshape(shape = concat_38, x = tile_19_cast_fp16)[name = string("reshape_38_cast_fp16")]; + tensor transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_39 = const()[name = string("concat_39"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_39_cast_fp16 = transpose(perm = transpose_39_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_46")]; + tensor reshape_39_cast_fp16 = reshape(shape = concat_39, x = transpose_39_cast_fp16)[name = string("reshape_39_cast_fp16")]; + tensor V_expanded_19_perm_0 = const()[name = string("V_expanded_19_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_37_transpose_x_0 = const()[name = string("attn_weights_37_transpose_x_0"), val = bool(false)]; + bool attn_weights_37_transpose_y_0 = const()[name = string("attn_weights_37_transpose_y_0"), val = bool(false)]; + tensor transpose_57_cast_fp16 = transpose(perm = transpose_57_perm_0, x = reshape_37_cast_fp16)[name = string("transpose_45")]; + tensor attn_weights_37_cast_fp16 = matmul(transpose_x = attn_weights_37_transpose_x_0, transpose_y = attn_weights_37_transpose_y_0, x = q_79_cast_fp16, y = transpose_57_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor x_187_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask_sliding)[name = string("x_187_cast_fp16")]; + tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; + bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; + tensor reduce_max_9 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_187_cast_fp16)[name = string("reduce_max_9")]; + tensor var_6022 = sub(x = x_187_cast_fp16, y = reduce_max_9)[name = string("op_6022")]; + tensor var_6028 = exp(x = var_6022)[name = string("op_6028")]; + tensor var_6038_axes_0 = const()[name = string("op_6038_axes_0"), val = tensor([-1])]; + bool var_6038_keep_dims_0 = const()[name = string("op_6038_keep_dims_0"), val = bool(true)]; + tensor var_6038 = reduce_sum(axes = var_6038_axes_0, keep_dims = var_6038_keep_dims_0, x = var_6028)[name = string("op_6038")]; + tensor var_6044_cast_fp16 = real_div(x = var_6028, y = var_6038)[name = string("op_6044_cast_fp16")]; + bool attn_output_55_transpose_x_0 = const()[name = string("attn_output_55_transpose_x_0"), val = bool(false)]; + bool attn_output_55_transpose_y_0 = const()[name = string("attn_output_55_transpose_y_0"), val = bool(false)]; + tensor V_expanded_19_cast_fp16 = transpose(perm = V_expanded_19_perm_0, x = reshape_39_cast_fp16)[name = string("transpose_44")]; + tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_0, transpose_y = attn_output_55_transpose_y_0, x = var_6044_cast_fp16, y = V_expanded_19_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_6055 = const()[name = string("op_6055"), val = tensor([0, 2, 1, 3])]; + tensor var_6062 = const()[name = string("op_6062"), val = tensor([1, 1, -1])]; + tensor var_6056_cast_fp16 = transpose(perm = var_6055, x = attn_output_55_cast_fp16)[name = string("transpose_43")]; + tensor attn_output_57_cast_fp16 = reshape(shape = var_6062, x = var_6056_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_6067 = const()[name = string("op_6067"), val = tensor([0, 2, 1])]; + string var_6083_pad_type_0 = const()[name = string("op_6083_pad_type_0"), val = string("valid")]; + int32 var_6083_groups_0 = const()[name = string("op_6083_groups_0"), val = int32(1)]; + tensor var_6083_strides_0 = const()[name = string("op_6083_strides_0"), val = tensor([1])]; + tensor var_6083_pad_0 = const()[name = string("op_6083_pad_0"), val = tensor([0, 0])]; + tensor var_6083_dilations_0 = const()[name = string("op_6083_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560655040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563276544))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6068_cast_fp16 = transpose(perm = var_6067, x = attn_output_57_cast_fp16)[name = string("transpose_42")]; + tensor var_6083_cast_fp16 = conv(dilations = var_6083_dilations_0, groups = var_6083_groups_0, pad = var_6083_pad_0, pad_type = var_6083_pad_type_0, strides = var_6083_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6068_cast_fp16)[name = string("op_6083_cast_fp16")]; + tensor var_6087 = const()[name = string("op_6087"), val = tensor([0, 2, 1])]; + int32 var_6093 = const()[name = string("op_6093"), val = int32(-1)]; + fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_191_cast_fp16 = transpose(perm = var_6087, x = var_6083_cast_fp16)[name = string("transpose_41")]; + tensor var_6095_cast_fp16 = mul(x = x_191_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_6095_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_6093, interleave = input_281_interleave_0, values = (x_191_cast_fp16, var_6095_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_6090_to_fp16 = const()[name = string("op_6090_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_6090_to_fp16, x = input_281_cast_fp16)[name = string("normed_265_cast_fp16")]; + tensor var_6100_split_sizes_0 = const()[name = string("op_6100_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6100_axis_0 = const()[name = string("op_6100_axis_0"), val = int32(-1)]; + tensor var_6100_cast_fp16_0, tensor var_6100_cast_fp16_1 = split(axis = var_6100_axis_0, split_sizes = var_6100_split_sizes_0, x = normed_265_cast_fp16)[name = string("op_6100_cast_fp16")]; + tensor layers_9_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563279168)))]; + tensor attn_output_59_cast_fp16 = mul(x = var_6100_cast_fp16_0, y = layers_9_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor x_193_cast_fp16 = add(x = x_179_cast_fp16, y = attn_output_59_cast_fp16)[name = string("x_193_cast_fp16")]; + int32 var_6109 = const()[name = string("op_6109"), val = int32(-1)]; + fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6111_cast_fp16 = mul(x = x_193_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_6111_cast_fp16")]; + bool input_283_interleave_0 = const()[name = string("input_283_interleave_0"), val = bool(false)]; + tensor input_283_cast_fp16 = concat(axis = var_6109, interleave = input_283_interleave_0, values = (x_193_cast_fp16, var_6111_cast_fp16))[name = string("input_283_cast_fp16")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_6106_to_fp16 = const()[name = string("op_6106_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_6106_to_fp16, x = input_283_cast_fp16)[name = string("normed_269_cast_fp16")]; + tensor var_6116_split_sizes_0 = const()[name = string("op_6116_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6116_axis_0 = const()[name = string("op_6116_axis_0"), val = int32(-1)]; + tensor var_6116_cast_fp16_0, tensor var_6116_cast_fp16_1 = split(axis = var_6116_axis_0, split_sizes = var_6116_split_sizes_0, x = normed_269_cast_fp16)[name = string("op_6116_cast_fp16")]; + tensor layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563284352)))]; + tensor h_57_cast_fp16 = mul(x = var_6116_cast_fp16_0, y = layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_57_cast_fp16")]; + tensor var_6127 = const()[name = string("op_6127"), val = tensor([0, 2, 1])]; + tensor input_285_axes_0 = const()[name = string("input_285_axes_0"), val = tensor([2])]; + tensor var_6128 = transpose(perm = var_6127, x = h_57_cast_fp16)[name = string("transpose_40")]; + tensor input_285 = expand_dims(axes = input_285_axes_0, x = var_6128)[name = string("input_285")]; + string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; + tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; + tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; + int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; + tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_285)[name = string("gate_37")]; + string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; + tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; + tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; + int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; + tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_285)[name = string("up_19")]; + string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; + tensor input_287 = mul(x = gate_39, y = up_19)[name = string("input_287")]; + string mlp_out_19_pad_type_0 = const()[name = string("mlp_out_19_pad_type_0"), val = string("valid")]; + tensor mlp_out_19_strides_0 = const()[name = string("mlp_out_19_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_19_pad_0 = const()[name = string("mlp_out_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_19_dilations_0 = const()[name = string("mlp_out_19_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_19_groups_0 = const()[name = string("mlp_out_19_groups_0"), val = int32(1)]; + tensor mlp_out_19 = conv(dilations = mlp_out_19_dilations_0, groups = mlp_out_19_groups_0, pad = mlp_out_19_pad_0, pad_type = mlp_out_19_pad_type_0, strides = mlp_out_19_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_287)[name = string("mlp_out_19")]; + tensor var_6168_axes_0 = const()[name = string("op_6168_axes_0"), val = tensor([2])]; + tensor var_6168 = squeeze(axes = var_6168_axes_0, x = mlp_out_19)[name = string("op_6168")]; + tensor var_6172 = const()[name = string("op_6172"), val = tensor([0, 2, 1])]; + int32 var_6178 = const()[name = string("op_6178"), val = int32(-1)]; + fp16 const_115_promoted = const()[name = string("const_115_promoted"), val = fp16(-0x1p+0)]; + tensor x_195 = transpose(perm = var_6172, x = var_6168)[name = string("transpose_39")]; + tensor var_6180 = mul(x = x_195, y = const_115_promoted)[name = string("op_6180")]; + bool input_289_interleave_0 = const()[name = string("input_289_interleave_0"), val = bool(false)]; + tensor input_289 = concat(axis = var_6178, interleave = input_289_interleave_0, values = (x_195, var_6180))[name = string("input_289")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_6175_to_fp16 = const()[name = string("op_6175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_6175_to_fp16, x = input_289)[name = string("normed_273_cast_fp16")]; + tensor var_6185_split_sizes_0 = const()[name = string("op_6185_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6185_axis_0 = const()[name = string("op_6185_axis_0"), val = int32(-1)]; + tensor var_6185_0, tensor var_6185_1 = split(axis = var_6185_axis_0, split_sizes = var_6185_split_sizes_0, x = normed_273_cast_fp16)[name = string("op_6185")]; + tensor hidden_states_93 = mul(x = var_6185_0, y = layers_9_post_feedforward_layernorm_weight)[name = string("hidden_states_93")]; + tensor hidden_states_95_cast_fp16 = add(x = x_193_cast_fp16, y = hidden_states_93)[name = string("hidden_states_95_cast_fp16")]; + tensor per_layer_slice_19_begin_0 = const()[name = string("per_layer_slice_19_begin_0"), val = tensor([0, 0, 5376])]; + tensor per_layer_slice_19_end_0 = const()[name = string("per_layer_slice_19_end_0"), val = tensor([1, 1, 5632])]; + tensor per_layer_slice_19_end_mask_0 = const()[name = string("per_layer_slice_19_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_19_cast_fp16 = slice_by_index(begin = per_layer_slice_19_begin_0, end = per_layer_slice_19_end_0, end_mask = per_layer_slice_19_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_19_cast_fp16")]; + tensor var_6213 = const()[name = string("op_6213"), val = tensor([0, 2, 1])]; + tensor input_291_axes_0 = const()[name = string("input_291_axes_0"), val = tensor([2])]; + tensor var_6214 = transpose(perm = var_6213, x = hidden_states_95_cast_fp16)[name = string("transpose_38")]; + tensor input_291 = expand_dims(axes = input_291_axes_0, x = var_6214)[name = string("input_291")]; + string gated_55_pad_type_0 = const()[name = string("gated_55_pad_type_0"), val = string("valid")]; + tensor gated_55_strides_0 = const()[name = string("gated_55_strides_0"), val = tensor([1, 1])]; + tensor gated_55_pad_0 = const()[name = string("gated_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_55_dilations_0 = const()[name = string("gated_55_dilations_0"), val = tensor([1, 1])]; + int32 gated_55_groups_0 = const()[name = string("gated_55_groups_0"), val = int32(1)]; + tensor gated_55 = conv(dilations = gated_55_dilations_0, groups = gated_55_groups_0, pad = gated_55_pad_0, pad_type = gated_55_pad_type_0, strides = gated_55_strides_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = input_291)[name = string("gated_55")]; + string gated_57_mode_0 = const()[name = string("gated_57_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_57 = gelu(mode = gated_57_mode_0, x = gated_55)[name = string("gated_57")]; + tensor var_6233 = const()[name = string("op_6233"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_19_axes_0 = const()[name = string("per_layer_slice_conv_19_axes_0"), val = tensor([2])]; + tensor var_6234_cast_fp16 = transpose(perm = var_6233, x = per_layer_slice_19_cast_fp16)[name = string("transpose_37")]; + tensor per_layer_slice_conv_19_cast_fp16 = expand_dims(axes = per_layer_slice_conv_19_axes_0, x = var_6234_cast_fp16)[name = string("per_layer_slice_conv_19_cast_fp16")]; + tensor input_293_cast_fp16 = mul(x = gated_57, y = per_layer_slice_conv_19_cast_fp16)[name = string("input_293_cast_fp16")]; + string gated_59_pad_type_0 = const()[name = string("gated_59_pad_type_0"), val = string("valid")]; + tensor gated_59_strides_0 = const()[name = string("gated_59_strides_0"), val = tensor([1, 1])]; + tensor gated_59_pad_0 = const()[name = string("gated_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_59_dilations_0 = const()[name = string("gated_59_dilations_0"), val = tensor([1, 1])]; + int32 gated_59_groups_0 = const()[name = string("gated_59_groups_0"), val = int32(1)]; + tensor layers_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563289536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563617280))))[name = string("layers_9_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_59_cast_fp16 = conv(dilations = gated_59_dilations_0, groups = gated_59_groups_0, pad = gated_59_pad_0, pad_type = gated_59_pad_type_0, strides = gated_59_strides_0, weight = layers_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_293_cast_fp16)[name = string("gated_59_cast_fp16")]; + tensor var_6250_axes_0 = const()[name = string("op_6250_axes_0"), val = tensor([2])]; + tensor var_6250_cast_fp16 = squeeze(axes = var_6250_axes_0, x = gated_59_cast_fp16)[name = string("op_6250_cast_fp16")]; + tensor var_6254 = const()[name = string("op_6254"), val = tensor([0, 2, 1])]; + int32 var_6260 = const()[name = string("op_6260"), val = int32(-1)]; + fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_197_cast_fp16 = transpose(perm = var_6254, x = var_6250_cast_fp16)[name = string("transpose_36")]; + tensor var_6262_cast_fp16 = mul(x = x_197_cast_fp16, y = const_116_promoted_to_fp16)[name = string("op_6262_cast_fp16")]; + bool input_295_interleave_0 = const()[name = string("input_295_interleave_0"), val = bool(false)]; + tensor input_295_cast_fp16 = concat(axis = var_6260, interleave = input_295_interleave_0, values = (x_197_cast_fp16, var_6262_cast_fp16))[name = string("input_295_cast_fp16")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_6257_to_fp16 = const()[name = string("op_6257_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_6257_to_fp16, x = input_295_cast_fp16)[name = string("normed_277_cast_fp16")]; + tensor var_6267_split_sizes_0 = const()[name = string("op_6267_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6267_axis_0 = const()[name = string("op_6267_axis_0"), val = int32(-1)]; + tensor var_6267_cast_fp16_0, tensor var_6267_cast_fp16_1 = split(axis = var_6267_axis_0, split_sizes = var_6267_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_6267_cast_fp16")]; + tensor layers_9_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563619904)))]; + tensor hidden_states_99_cast_fp16 = mul(x = var_6267_cast_fp16_0, y = layers_9_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + tensor const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = tensor([0x1.d8p-2])]; + tensor x_199_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_117_promoted_to_fp16)[name = string("x_199_cast_fp16")]; + tensor var_6279_axes_0 = const()[name = string("op_6279_axes_0"), val = tensor([0])]; + tensor var_6279_cast_fp16 = squeeze(axes = var_6279_axes_0, x = K_sliding_out_17_cast_fp16)[name = string("op_6279_cast_fp16")]; + tensor var_6281_axes_0 = const()[name = string("op_6281_axes_0"), val = tensor([0])]; + tensor var_6281_cast_fp16 = squeeze(axes = var_6281_axes_0, x = V_sliding_out_17_cast_fp16)[name = string("op_6281_cast_fp16")]; + tensor var_6284_begin_0 = const()[name = string("op_6284_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6284_end_0 = const()[name = string("op_6284_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6284_end_mask_0 = const()[name = string("op_6284_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6284_squeeze_mask_0 = const()[name = string("op_6284_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6284_cast_fp16 = slice_by_index(begin = var_6284_begin_0, end = var_6284_end_0, end_mask = var_6284_end_mask_0, squeeze_mask = var_6284_squeeze_mask_0, x = K_sliding_in)[name = string("op_6284_cast_fp16")]; + tensor K_sliding_slot_axes_0 = const()[name = string("K_sliding_slot_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_cast_fp16 = expand_dims(axes = K_sliding_slot_axes_0, x = var_6284_cast_fp16)[name = string("K_sliding_slot_cast_fp16")]; + tensor var_6289_begin_0 = const()[name = string("op_6289_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6289_end_0 = const()[name = string("op_6289_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6289_end_mask_0 = const()[name = string("op_6289_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6289_squeeze_mask_0 = const()[name = string("op_6289_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6289_cast_fp16 = slice_by_index(begin = var_6289_begin_0, end = var_6289_end_0, end_mask = var_6289_end_mask_0, squeeze_mask = var_6289_squeeze_mask_0, x = V_sliding_in)[name = string("op_6289_cast_fp16")]; + tensor V_sliding_slot_axes_0 = const()[name = string("V_sliding_slot_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_cast_fp16 = expand_dims(axes = V_sliding_slot_axes_0, x = var_6289_cast_fp16)[name = string("V_sliding_slot_cast_fp16")]; + int32 var_6296 = const()[name = string("op_6296"), val = int32(-1)]; + fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6298_cast_fp16 = mul(x = x_199_cast_fp16, y = const_118_promoted_to_fp16)[name = string("op_6298_cast_fp16")]; + bool input_297_interleave_0 = const()[name = string("input_297_interleave_0"), val = bool(false)]; + tensor input_297_cast_fp16 = concat(axis = var_6296, interleave = input_297_interleave_0, values = (x_199_cast_fp16, var_6298_cast_fp16))[name = string("input_297_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_6293_to_fp16 = const()[name = string("op_6293_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_6293_to_fp16, x = input_297_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor var_6303_split_sizes_0 = const()[name = string("op_6303_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6303_axis_0 = const()[name = string("op_6303_axis_0"), val = int32(-1)]; + tensor var_6303_cast_fp16_0, tensor var_6303_cast_fp16_1 = split(axis = var_6303_axis_0, split_sizes = var_6303_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_6303_cast_fp16")]; + tensor layers_10_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563625088)))]; + tensor h_61_cast_fp16 = mul(x = var_6303_cast_fp16_0, y = layers_10_input_layernorm_weight_promoted_to_fp16)[name = string("h_61_cast_fp16")]; + tensor var_6309 = const()[name = string("op_6309"), val = tensor([0, 2, 1])]; + tensor var_6312_axes_0 = const()[name = string("op_6312_axes_0"), val = tensor([2])]; + tensor var_6310_cast_fp16 = transpose(perm = var_6309, x = h_61_cast_fp16)[name = string("transpose_35")]; + tensor var_6312_cast_fp16 = expand_dims(axes = var_6312_axes_0, x = var_6310_cast_fp16)[name = string("op_6312_cast_fp16")]; + string var_6328_pad_type_0 = const()[name = string("op_6328_pad_type_0"), val = string("valid")]; + tensor var_6328_strides_0 = const()[name = string("op_6328_strides_0"), val = tensor([1, 1])]; + tensor var_6328_pad_0 = const()[name = string("op_6328_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6328_dilations_0 = const()[name = string("op_6328_dilations_0"), val = tensor([1, 1])]; + int32 var_6328_groups_0 = const()[name = string("op_6328_groups_0"), val = int32(1)]; + tensor var_6328 = conv(dilations = var_6328_dilations_0, groups = var_6328_groups_0, pad = var_6328_pad_0, pad_type = var_6328_pad_type_0, strides = var_6328_strides_0, weight = layers_10_self_attn_q_proj_weight_palettized, x = var_6312_cast_fp16)[name = string("op_6328")]; + tensor var_6333 = const()[name = string("op_6333"), val = tensor([1, 8, 256, 1])]; + tensor var_6334 = reshape(shape = var_6333, x = var_6328)[name = string("op_6334")]; + tensor var_6339 = const()[name = string("op_6339"), val = tensor([0, 1, 3, 2])]; + tensor var_6349 = const()[name = string("op_6349"), val = tensor([1, 8, 256])]; + tensor var_6340 = transpose(perm = var_6339, x = var_6334)[name = string("transpose_34")]; + tensor x_201 = reshape(shape = var_6349, x = var_6340)[name = string("x_201")]; + int32 var_6355 = const()[name = string("op_6355"), val = int32(-1)]; + fp16 const_119_promoted = const()[name = string("const_119_promoted"), val = fp16(-0x1p+0)]; + tensor var_6357 = mul(x = x_201, y = const_119_promoted)[name = string("op_6357")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301 = concat(axis = var_6355, interleave = input_301_interleave_0, values = (x_201, var_6357))[name = string("input_301")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_6352_to_fp16 = const()[name = string("op_6352_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_6352_to_fp16, x = input_301)[name = string("normed_285_cast_fp16")]; + tensor var_6362_split_sizes_0 = const()[name = string("op_6362_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6362_axis_0 = const()[name = string("op_6362_axis_0"), val = int32(-1)]; + tensor var_6362_0, tensor var_6362_1 = split(axis = var_6362_axis_0, split_sizes = var_6362_split_sizes_0, x = normed_285_cast_fp16)[name = string("op_6362")]; + tensor var_6364 = mul(x = var_6362_0, y = layers_10_self_attn_q_norm_weight)[name = string("op_6364")]; + tensor var_6369 = const()[name = string("op_6369"), val = tensor([1, 8, 1, 256])]; + tensor q_83 = reshape(shape = var_6369, x = var_6364)[name = string("q_83")]; + tensor var_6371_cast_fp16 = mul(x = q_83, y = cos_s)[name = string("op_6371_cast_fp16")]; + tensor var_6372_split_sizes_0 = const()[name = string("op_6372_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6372_axis_0 = const()[name = string("op_6372_axis_0"), val = int32(-1)]; + tensor var_6372_0, tensor var_6372_1 = split(axis = var_6372_axis_0, split_sizes = var_6372_split_sizes_0, x = q_83)[name = string("op_6372")]; + fp16 const_120_promoted = const()[name = string("const_120_promoted"), val = fp16(-0x1p+0)]; + tensor var_6374 = mul(x = var_6372_1, y = const_120_promoted)[name = string("op_6374")]; + int32 var_6376 = const()[name = string("op_6376"), val = int32(-1)]; + bool var_6377_interleave_0 = const()[name = string("op_6377_interleave_0"), val = bool(false)]; + tensor var_6377 = concat(axis = var_6376, interleave = var_6377_interleave_0, values = (var_6374, var_6372_0))[name = string("op_6377")]; + tensor var_6378_cast_fp16 = mul(x = var_6377, y = sin_s)[name = string("op_6378_cast_fp16")]; + tensor q_87_cast_fp16 = add(x = var_6371_cast_fp16, y = var_6378_cast_fp16)[name = string("q_87_cast_fp16")]; + string var_6391_pad_type_0 = const()[name = string("op_6391_pad_type_0"), val = string("valid")]; + tensor var_6391_strides_0 = const()[name = string("op_6391_strides_0"), val = tensor([1, 1])]; + tensor var_6391_pad_0 = const()[name = string("op_6391_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6391_dilations_0 = const()[name = string("op_6391_dilations_0"), val = tensor([1, 1])]; + int32 var_6391_groups_0 = const()[name = string("op_6391_groups_0"), val = int32(1)]; + tensor var_6391 = conv(dilations = var_6391_dilations_0, groups = var_6391_groups_0, pad = var_6391_pad_0, pad_type = var_6391_pad_type_0, strides = var_6391_strides_0, weight = layers_10_self_attn_k_proj_weight_palettized, x = var_6312_cast_fp16)[name = string("op_6391")]; + tensor var_6396 = const()[name = string("op_6396"), val = tensor([1, 2, 256, 1])]; + tensor var_6397 = reshape(shape = var_6396, x = var_6391)[name = string("op_6397")]; + tensor var_6402 = const()[name = string("op_6402"), val = tensor([0, 1, 3, 2])]; + string var_6419_pad_type_0 = const()[name = string("op_6419_pad_type_0"), val = string("valid")]; + tensor var_6419_strides_0 = const()[name = string("op_6419_strides_0"), val = tensor([1, 1])]; + tensor var_6419_pad_0 = const()[name = string("op_6419_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6419_dilations_0 = const()[name = string("op_6419_dilations_0"), val = tensor([1, 1])]; + int32 var_6419_groups_0 = const()[name = string("op_6419_groups_0"), val = int32(1)]; + tensor var_6419 = conv(dilations = var_6419_dilations_0, groups = var_6419_groups_0, pad = var_6419_pad_0, pad_type = var_6419_pad_type_0, strides = var_6419_strides_0, weight = layers_10_self_attn_v_proj_weight_palettized, x = var_6312_cast_fp16)[name = string("op_6419")]; + tensor var_6424 = const()[name = string("op_6424"), val = tensor([1, 2, 256, 1])]; + tensor var_6425 = reshape(shape = var_6424, x = var_6419)[name = string("op_6425")]; + tensor var_6430 = const()[name = string("op_6430"), val = tensor([0, 1, 3, 2])]; + tensor var_6440 = const()[name = string("op_6440"), val = tensor([1, 2, 256])]; + tensor var_6403 = transpose(perm = var_6402, x = var_6397)[name = string("transpose_33")]; + tensor x_203 = reshape(shape = var_6440, x = var_6403)[name = string("x_203")]; + int32 var_6446 = const()[name = string("op_6446"), val = int32(-1)]; + fp16 const_121_promoted = const()[name = string("const_121_promoted"), val = fp16(-0x1p+0)]; + tensor var_6448 = mul(x = x_203, y = const_121_promoted)[name = string("op_6448")]; + bool input_303_interleave_0 = const()[name = string("input_303_interleave_0"), val = bool(false)]; + tensor input_303 = concat(axis = var_6446, interleave = input_303_interleave_0, values = (x_203, var_6448))[name = string("input_303")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_6443_to_fp16 = const()[name = string("op_6443_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_6443_to_fp16, x = input_303)[name = string("normed_289_cast_fp16")]; + tensor var_6453_split_sizes_0 = const()[name = string("op_6453_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6453_axis_0 = const()[name = string("op_6453_axis_0"), val = int32(-1)]; + tensor var_6453_0, tensor var_6453_1 = split(axis = var_6453_axis_0, split_sizes = var_6453_split_sizes_0, x = normed_289_cast_fp16)[name = string("op_6453")]; + tensor var_6455 = mul(x = var_6453_0, y = layers_4_self_attn_k_norm_weight)[name = string("op_6455")]; + tensor var_6460 = const()[name = string("op_6460"), val = tensor([1, 2, 1, 256])]; + tensor q_85 = reshape(shape = var_6460, x = var_6455)[name = string("q_85")]; + fp16 var_6462_promoted = const()[name = string("op_6462_promoted"), val = fp16(0x1p+1)]; + tensor var_6431 = transpose(perm = var_6430, x = var_6425)[name = string("transpose_32")]; + tensor var_6463 = pow(x = var_6431, y = var_6462_promoted)[name = string("op_6463")]; + tensor var_6468_axes_0 = const()[name = string("op_6468_axes_0"), val = tensor([-1])]; + bool var_6468_keep_dims_0 = const()[name = string("op_6468_keep_dims_0"), val = bool(true)]; + tensor var_6468 = reduce_mean(axes = var_6468_axes_0, keep_dims = var_6468_keep_dims_0, x = var_6463)[name = string("op_6468")]; + fp16 var_6470_to_fp16 = const()[name = string("op_6470_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_21_cast_fp16 = add(x = var_6468, y = var_6470_to_fp16)[name = string("mean_sq_21_cast_fp16")]; + fp32 var_6472_epsilon_0 = const()[name = string("op_6472_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6472_cast_fp16 = rsqrt(epsilon = var_6472_epsilon_0, x = mean_sq_21_cast_fp16)[name = string("op_6472_cast_fp16")]; + tensor input_307_cast_fp16 = mul(x = var_6431, y = var_6472_cast_fp16)[name = string("input_307_cast_fp16")]; + tensor var_6474_cast_fp16 = mul(x = q_85, y = cos_s)[name = string("op_6474_cast_fp16")]; + tensor var_6475_split_sizes_0 = const()[name = string("op_6475_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6475_axis_0 = const()[name = string("op_6475_axis_0"), val = int32(-1)]; + tensor var_6475_0, tensor var_6475_1 = split(axis = var_6475_axis_0, split_sizes = var_6475_split_sizes_0, x = q_85)[name = string("op_6475")]; + fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; + tensor var_6477 = mul(x = var_6475_1, y = const_122_promoted)[name = string("op_6477")]; + int32 var_6479 = const()[name = string("op_6479"), val = int32(-1)]; + bool var_6480_interleave_0 = const()[name = string("op_6480_interleave_0"), val = bool(false)]; + tensor var_6480 = concat(axis = var_6479, interleave = var_6480_interleave_0, values = (var_6477, var_6475_0))[name = string("op_6480")]; + tensor var_6481_cast_fp16 = mul(x = var_6480, y = sin_s)[name = string("op_6481_cast_fp16")]; + tensor input_305_cast_fp16 = add(x = var_6474_cast_fp16, y = var_6481_cast_fp16)[name = string("input_305_cast_fp16")]; + tensor k_padded_pad_0 = const()[name = string("k_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_mode_0 = const()[name = string("k_padded_mode_0"), val = string("constant")]; + fp16 const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_cast_fp16 = pad(constant_val = const_123_to_fp16, mode = k_padded_mode_0, pad = k_padded_pad_0, x = input_305_cast_fp16)[name = string("k_padded_cast_fp16")]; + tensor v_padded_pad_0 = const()[name = string("v_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_mode_0 = const()[name = string("v_padded_mode_0"), val = string("constant")]; + fp16 const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_cast_fp16 = pad(constant_val = const_124_to_fp16, mode = v_padded_mode_0, pad = v_padded_pad_0, x = input_307_cast_fp16)[name = string("v_padded_cast_fp16")]; + tensor var_6510_begin_0 = const()[name = string("op_6510_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6510_end_0 = const()[name = string("op_6510_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6510_end_mask_0 = const()[name = string("op_6510_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6510_cast_fp16 = slice_by_index(begin = var_6510_begin_0, end = var_6510_end_0, end_mask = var_6510_end_mask_0, x = K_sliding_slot_cast_fp16)[name = string("op_6510_cast_fp16")]; + int32 var_6517 = const()[name = string("op_6517"), val = int32(2)]; + bool K_sliding_out_interleave_0 = const()[name = string("K_sliding_out_interleave_0"), val = bool(false)]; + tensor K_sliding_out_cast_fp16 = concat(axis = var_6517, interleave = K_sliding_out_interleave_0, values = (var_6510_cast_fp16, k_padded_cast_fp16))[name = string("K_sliding_out_cast_fp16")]; + tensor var_6533_begin_0 = const()[name = string("op_6533_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6533_end_0 = const()[name = string("op_6533_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6533_end_mask_0 = const()[name = string("op_6533_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6533_cast_fp16 = slice_by_index(begin = var_6533_begin_0, end = var_6533_end_0, end_mask = var_6533_end_mask_0, x = V_sliding_slot_cast_fp16)[name = string("op_6533_cast_fp16")]; + int32 var_6540 = const()[name = string("op_6540"), val = int32(2)]; + bool V_sliding_out_interleave_0 = const()[name = string("V_sliding_out_interleave_0"), val = bool(false)]; + tensor V_sliding_out_cast_fp16 = concat(axis = var_6540, interleave = V_sliding_out_interleave_0, values = (var_6533_cast_fp16, v_padded_cast_fp16))[name = string("V_sliding_out_cast_fp16")]; + tensor K_for_attn_21_begin_0 = const()[name = string("K_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_21_end_0 = const()[name = string("K_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_21_end_mask_0 = const()[name = string("K_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor kv13_k = slice_by_index(begin = K_for_attn_21_begin_0, end = K_for_attn_21_end_0, end_mask = K_for_attn_21_end_mask_0, x = K_sliding_out_cast_fp16)[name = string("K_for_attn_21_cast_fp16")]; + tensor V_for_attn_21_begin_0 = const()[name = string("V_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_21_end_0 = const()[name = string("V_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_21_end_mask_0 = const()[name = string("V_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor kv13_v = slice_by_index(begin = V_for_attn_21_begin_0, end = V_for_attn_21_end_0, end_mask = V_for_attn_21_end_mask_0, x = V_sliding_out_cast_fp16)[name = string("V_for_attn_21_cast_fp16")]; + tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_20_reps_0 = const()[name = string("tile_20_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_40_cast_fp16 = transpose(perm = transpose_40_perm_0, x = kv13_k)[name = string("transpose_31")]; + tensor tile_20_cast_fp16 = tile(reps = tile_20_reps_0, x = transpose_40_cast_fp16)[name = string("tile_20_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_40_cast_fp16 = reshape(shape = concat_40, x = tile_20_cast_fp16)[name = string("reshape_40_cast_fp16")]; + tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_40_cast_fp16)[name = string("transpose_30")]; + tensor reshape_41_cast_fp16 = reshape(shape = concat_41, x = transpose_41_cast_fp16)[name = string("reshape_41_cast_fp16")]; + tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_21_reps_0 = const()[name = string("tile_21_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_42_cast_fp16 = transpose(perm = transpose_42_perm_0, x = kv13_v)[name = string("transpose_29")]; + tensor tile_21_cast_fp16 = tile(reps = tile_21_reps_0, x = transpose_42_cast_fp16)[name = string("tile_21_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_42_cast_fp16 = reshape(shape = concat_42, x = tile_21_cast_fp16)[name = string("reshape_42_cast_fp16")]; + tensor transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_43_cast_fp16 = transpose(perm = transpose_43_perm_0, x = reshape_42_cast_fp16)[name = string("transpose_28")]; + tensor reshape_43_cast_fp16 = reshape(shape = concat_43, x = transpose_43_cast_fp16)[name = string("reshape_43_cast_fp16")]; + tensor V_expanded_21_perm_0 = const()[name = string("V_expanded_21_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor transpose_58_cast_fp16 = transpose(perm = transpose_58_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_27")]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = q_87_cast_fp16, y = transpose_58_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask_sliding)[name = string("x_207_cast_fp16")]; + tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; + bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; + tensor reduce_max_10 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = x_207_cast_fp16)[name = string("reduce_max_10")]; + tensor var_6591 = sub(x = x_207_cast_fp16, y = reduce_max_10)[name = string("op_6591")]; + tensor var_6597 = exp(x = var_6591)[name = string("op_6597")]; + tensor var_6607_axes_0 = const()[name = string("op_6607_axes_0"), val = tensor([-1])]; + bool var_6607_keep_dims_0 = const()[name = string("op_6607_keep_dims_0"), val = bool(true)]; + tensor var_6607 = reduce_sum(axes = var_6607_axes_0, keep_dims = var_6607_keep_dims_0, x = var_6597)[name = string("op_6607")]; + tensor var_6613_cast_fp16 = real_div(x = var_6597, y = var_6607)[name = string("op_6613_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor V_expanded_21_cast_fp16 = transpose(perm = V_expanded_21_perm_0, x = reshape_43_cast_fp16)[name = string("transpose_26")]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = var_6613_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_6624 = const()[name = string("op_6624"), val = tensor([0, 2, 1, 3])]; + tensor var_6631 = const()[name = string("op_6631"), val = tensor([1, 1, -1])]; + tensor var_6625_cast_fp16 = transpose(perm = var_6624, x = attn_output_61_cast_fp16)[name = string("transpose_25")]; + tensor attn_output_63_cast_fp16 = reshape(shape = var_6631, x = var_6625_cast_fp16)[name = string("attn_output_63_cast_fp16")]; + tensor var_6636 = const()[name = string("op_6636"), val = tensor([0, 2, 1])]; + string var_6652_pad_type_0 = const()[name = string("op_6652_pad_type_0"), val = string("valid")]; + int32 var_6652_groups_0 = const()[name = string("op_6652_groups_0"), val = int32(1)]; + tensor var_6652_strides_0 = const()[name = string("op_6652_strides_0"), val = tensor([1])]; + tensor var_6652_pad_0 = const()[name = string("op_6652_pad_0"), val = tensor([0, 0])]; + tensor var_6652_dilations_0 = const()[name = string("op_6652_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563630272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566251776))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6637_cast_fp16 = transpose(perm = var_6636, x = attn_output_63_cast_fp16)[name = string("transpose_24")]; + tensor var_6652_cast_fp16 = conv(dilations = var_6652_dilations_0, groups = var_6652_groups_0, pad = var_6652_pad_0, pad_type = var_6652_pad_type_0, strides = var_6652_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_6637_cast_fp16)[name = string("op_6652_cast_fp16")]; + tensor var_6656 = const()[name = string("op_6656"), val = tensor([0, 2, 1])]; + int32 var_6662 = const()[name = string("op_6662"), val = int32(-1)]; + fp16 const_125_promoted_to_fp16 = const()[name = string("const_125_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_211_cast_fp16 = transpose(perm = var_6656, x = var_6652_cast_fp16)[name = string("transpose_23")]; + tensor var_6664_cast_fp16 = mul(x = x_211_cast_fp16, y = const_125_promoted_to_fp16)[name = string("op_6664_cast_fp16")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311_cast_fp16 = concat(axis = var_6662, interleave = input_311_interleave_0, values = (x_211_cast_fp16, var_6664_cast_fp16))[name = string("input_311_cast_fp16")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_6659_to_fp16 = const()[name = string("op_6659_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_6659_to_fp16, x = input_311_cast_fp16)[name = string("normed_293_cast_fp16")]; + tensor var_6669_split_sizes_0 = const()[name = string("op_6669_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6669_axis_0 = const()[name = string("op_6669_axis_0"), val = int32(-1)]; + tensor var_6669_cast_fp16_0, tensor var_6669_cast_fp16_1 = split(axis = var_6669_axis_0, split_sizes = var_6669_split_sizes_0, x = normed_293_cast_fp16)[name = string("op_6669_cast_fp16")]; + tensor layers_10_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566254400)))]; + tensor attn_output_65_cast_fp16 = mul(x = var_6669_cast_fp16_0, y = layers_10_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_199_cast_fp16, y = attn_output_65_cast_fp16)[name = string("x_213_cast_fp16")]; + int32 var_6678 = const()[name = string("op_6678"), val = int32(-1)]; + fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6680_cast_fp16 = mul(x = x_213_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_6680_cast_fp16")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313_cast_fp16 = concat(axis = var_6678, interleave = input_313_interleave_0, values = (x_213_cast_fp16, var_6680_cast_fp16))[name = string("input_313_cast_fp16")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_6675_to_fp16 = const()[name = string("op_6675_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_6675_to_fp16, x = input_313_cast_fp16)[name = string("normed_297_cast_fp16")]; + tensor var_6685_split_sizes_0 = const()[name = string("op_6685_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6685_axis_0 = const()[name = string("op_6685_axis_0"), val = int32(-1)]; + tensor var_6685_cast_fp16_0, tensor var_6685_cast_fp16_1 = split(axis = var_6685_axis_0, split_sizes = var_6685_split_sizes_0, x = normed_297_cast_fp16)[name = string("op_6685_cast_fp16")]; + tensor layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566259584)))]; + tensor h_63_cast_fp16 = mul(x = var_6685_cast_fp16_0, y = layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_63_cast_fp16")]; + tensor var_6696 = const()[name = string("op_6696"), val = tensor([0, 2, 1])]; + tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; + tensor var_6697 = transpose(perm = var_6696, x = h_63_cast_fp16)[name = string("transpose_22")]; + tensor input_315 = expand_dims(axes = input_315_axes_0, x = var_6697)[name = string("input_315")]; + string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; + tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; + tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; + int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; + tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = layers_10_mlp_gate_proj_weight_palettized, x = input_315)[name = string("gate_41")]; + string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; + tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; + tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; + int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; + tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = layers_10_mlp_up_proj_weight_palettized, x = input_315)[name = string("up_21")]; + string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; + tensor input_317 = mul(x = gate_43, y = up_21)[name = string("input_317")]; + string mlp_out_21_pad_type_0 = const()[name = string("mlp_out_21_pad_type_0"), val = string("valid")]; + tensor mlp_out_21_strides_0 = const()[name = string("mlp_out_21_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_21_pad_0 = const()[name = string("mlp_out_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_21_dilations_0 = const()[name = string("mlp_out_21_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_21_groups_0 = const()[name = string("mlp_out_21_groups_0"), val = int32(1)]; + tensor mlp_out_21 = conv(dilations = mlp_out_21_dilations_0, groups = mlp_out_21_groups_0, pad = mlp_out_21_pad_0, pad_type = mlp_out_21_pad_type_0, strides = mlp_out_21_strides_0, weight = layers_10_mlp_down_proj_weight_palettized, x = input_317)[name = string("mlp_out_21")]; + tensor var_6737_axes_0 = const()[name = string("op_6737_axes_0"), val = tensor([2])]; + tensor var_6737 = squeeze(axes = var_6737_axes_0, x = mlp_out_21)[name = string("op_6737")]; + tensor var_6741 = const()[name = string("op_6741"), val = tensor([0, 2, 1])]; + int32 var_6747 = const()[name = string("op_6747"), val = int32(-1)]; + fp16 const_127_promoted = const()[name = string("const_127_promoted"), val = fp16(-0x1p+0)]; + tensor x_215 = transpose(perm = var_6741, x = var_6737)[name = string("transpose_21")]; + tensor var_6749 = mul(x = x_215, y = const_127_promoted)[name = string("op_6749")]; + bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; + tensor input_319 = concat(axis = var_6747, interleave = input_319_interleave_0, values = (x_215, var_6749))[name = string("input_319")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_6744_to_fp16 = const()[name = string("op_6744_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_6744_to_fp16, x = input_319)[name = string("normed_301_cast_fp16")]; + tensor var_6754_split_sizes_0 = const()[name = string("op_6754_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6754_axis_0 = const()[name = string("op_6754_axis_0"), val = int32(-1)]; + tensor var_6754_0, tensor var_6754_1 = split(axis = var_6754_axis_0, split_sizes = var_6754_split_sizes_0, x = normed_301_cast_fp16)[name = string("op_6754")]; + tensor hidden_states_103 = mul(x = var_6754_0, y = layers_10_post_feedforward_layernorm_weight)[name = string("hidden_states_103")]; + tensor hidden_states_105_cast_fp16 = add(x = x_213_cast_fp16, y = hidden_states_103)[name = string("hidden_states_105_cast_fp16")]; + tensor per_layer_slice_21_begin_0 = const()[name = string("per_layer_slice_21_begin_0"), val = tensor([0, 0, 5632])]; + tensor per_layer_slice_21_end_0 = const()[name = string("per_layer_slice_21_end_0"), val = tensor([1, 1, 5888])]; + tensor per_layer_slice_21_end_mask_0 = const()[name = string("per_layer_slice_21_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_21_cast_fp16 = slice_by_index(begin = per_layer_slice_21_begin_0, end = per_layer_slice_21_end_0, end_mask = per_layer_slice_21_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_21_cast_fp16")]; + tensor var_6782 = const()[name = string("op_6782"), val = tensor([0, 2, 1])]; + tensor input_321_axes_0 = const()[name = string("input_321_axes_0"), val = tensor([2])]; + tensor var_6783 = transpose(perm = var_6782, x = hidden_states_105_cast_fp16)[name = string("transpose_20")]; + tensor input_321 = expand_dims(axes = input_321_axes_0, x = var_6783)[name = string("input_321")]; + string gated_61_pad_type_0 = const()[name = string("gated_61_pad_type_0"), val = string("valid")]; + tensor gated_61_strides_0 = const()[name = string("gated_61_strides_0"), val = tensor([1, 1])]; + tensor gated_61_pad_0 = const()[name = string("gated_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_61_dilations_0 = const()[name = string("gated_61_dilations_0"), val = tensor([1, 1])]; + int32 gated_61_groups_0 = const()[name = string("gated_61_groups_0"), val = int32(1)]; + tensor gated_61 = conv(dilations = gated_61_dilations_0, groups = gated_61_groups_0, pad = gated_61_pad_0, pad_type = gated_61_pad_type_0, strides = gated_61_strides_0, weight = layers_10_per_layer_input_gate_weight_palettized, x = input_321)[name = string("gated_61")]; + string gated_63_mode_0 = const()[name = string("gated_63_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_63 = gelu(mode = gated_63_mode_0, x = gated_61)[name = string("gated_63")]; + tensor var_6802 = const()[name = string("op_6802"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_21_axes_0 = const()[name = string("per_layer_slice_conv_21_axes_0"), val = tensor([2])]; + tensor var_6803_cast_fp16 = transpose(perm = var_6802, x = per_layer_slice_21_cast_fp16)[name = string("transpose_19")]; + tensor per_layer_slice_conv_21_cast_fp16 = expand_dims(axes = per_layer_slice_conv_21_axes_0, x = var_6803_cast_fp16)[name = string("per_layer_slice_conv_21_cast_fp16")]; + tensor input_323_cast_fp16 = mul(x = gated_63, y = per_layer_slice_conv_21_cast_fp16)[name = string("input_323_cast_fp16")]; + string gated_65_pad_type_0 = const()[name = string("gated_65_pad_type_0"), val = string("valid")]; + tensor gated_65_strides_0 = const()[name = string("gated_65_strides_0"), val = tensor([1, 1])]; + tensor gated_65_pad_0 = const()[name = string("gated_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_65_dilations_0 = const()[name = string("gated_65_dilations_0"), val = tensor([1, 1])]; + int32 gated_65_groups_0 = const()[name = string("gated_65_groups_0"), val = int32(1)]; + tensor layers_10_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566264768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566592512))))[name = string("layers_10_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_65_cast_fp16 = conv(dilations = gated_65_dilations_0, groups = gated_65_groups_0, pad = gated_65_pad_0, pad_type = gated_65_pad_type_0, strides = gated_65_strides_0, weight = layers_10_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_323_cast_fp16)[name = string("gated_65_cast_fp16")]; + tensor var_6819_axes_0 = const()[name = string("op_6819_axes_0"), val = tensor([2])]; + tensor var_6819_cast_fp16 = squeeze(axes = var_6819_axes_0, x = gated_65_cast_fp16)[name = string("op_6819_cast_fp16")]; + tensor var_6823 = const()[name = string("op_6823"), val = tensor([0, 2, 1])]; + int32 var_6829 = const()[name = string("op_6829"), val = int32(-1)]; + fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_217_cast_fp16 = transpose(perm = var_6823, x = var_6819_cast_fp16)[name = string("transpose_18")]; + tensor var_6831_cast_fp16 = mul(x = x_217_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_6831_cast_fp16")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325_cast_fp16 = concat(axis = var_6829, interleave = input_325_interleave_0, values = (x_217_cast_fp16, var_6831_cast_fp16))[name = string("input_325_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_6826_to_fp16 = const()[name = string("op_6826_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_6826_to_fp16, x = input_325_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor var_6836_split_sizes_0 = const()[name = string("op_6836_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6836_axis_0 = const()[name = string("op_6836_axis_0"), val = int32(-1)]; + tensor var_6836_cast_fp16_0, tensor var_6836_cast_fp16_1 = split(axis = var_6836_axis_0, split_sizes = var_6836_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_6836_cast_fp16")]; + tensor layers_10_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_10_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566595136)))]; + tensor hidden_states_109_cast_fp16 = mul(x = var_6836_cast_fp16_0, y = layers_10_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_109_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_105_cast_fp16, y = hidden_states_109_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + tensor const_129_promoted_to_fp16 = const()[name = string("const_129_promoted_to_fp16"), val = tensor([0x1.42p-3])]; + tensor x_219_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_129_promoted_to_fp16)[name = string("x_219_cast_fp16")]; + tensor var_6848_axes_0 = const()[name = string("op_6848_axes_0"), val = tensor([0])]; + tensor var_6848_cast_fp16 = squeeze(axes = var_6848_axes_0, x = K_sliding_out_cast_fp16)[name = string("op_6848_cast_fp16")]; + tensor var_6850_axes_0 = const()[name = string("op_6850_axes_0"), val = tensor([0])]; + tensor var_6850_cast_fp16 = squeeze(axes = var_6850_axes_0, x = V_sliding_out_cast_fp16)[name = string("op_6850_cast_fp16")]; + tensor var_6853_begin_0 = const()[name = string("op_6853_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_6853_end_0 = const()[name = string("op_6853_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_6853_end_mask_0 = const()[name = string("op_6853_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6853_squeeze_mask_0 = const()[name = string("op_6853_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6853_cast_fp16 = slice_by_index(begin = var_6853_begin_0, end = var_6853_end_0, end_mask = var_6853_end_mask_0, squeeze_mask = var_6853_squeeze_mask_0, x = K_full_in)[name = string("op_6853_cast_fp16")]; + tensor K_full_slot_axes_0 = const()[name = string("K_full_slot_axes_0"), val = tensor([0])]; + tensor K_full_slot_cast_fp16 = expand_dims(axes = K_full_slot_axes_0, x = var_6853_cast_fp16)[name = string("K_full_slot_cast_fp16")]; + tensor var_6858_begin_0 = const()[name = string("op_6858_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_6858_end_0 = const()[name = string("op_6858_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_6858_end_mask_0 = const()[name = string("op_6858_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6858_squeeze_mask_0 = const()[name = string("op_6858_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6858_cast_fp16 = slice_by_index(begin = var_6858_begin_0, end = var_6858_end_0, end_mask = var_6858_end_mask_0, squeeze_mask = var_6858_squeeze_mask_0, x = V_full_in)[name = string("op_6858_cast_fp16")]; + tensor V_full_slot_axes_0 = const()[name = string("V_full_slot_axes_0"), val = tensor([0])]; + tensor V_full_slot_cast_fp16 = expand_dims(axes = V_full_slot_axes_0, x = var_6858_cast_fp16)[name = string("V_full_slot_cast_fp16")]; + int32 var_6865 = const()[name = string("op_6865"), val = int32(-1)]; + fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6867_cast_fp16 = mul(x = x_219_cast_fp16, y = const_130_promoted_to_fp16)[name = string("op_6867_cast_fp16")]; + bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; + tensor input_327_cast_fp16 = concat(axis = var_6865, interleave = input_327_interleave_0, values = (x_219_cast_fp16, var_6867_cast_fp16))[name = string("input_327_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_6862_to_fp16 = const()[name = string("op_6862_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_6862_to_fp16, x = input_327_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor var_6872_split_sizes_0 = const()[name = string("op_6872_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6872_axis_0 = const()[name = string("op_6872_axis_0"), val = int32(-1)]; + tensor var_6872_cast_fp16_0, tensor var_6872_cast_fp16_1 = split(axis = var_6872_axis_0, split_sizes = var_6872_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_6872_cast_fp16")]; + tensor layers_11_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566600320)))]; + tensor h_67_cast_fp16 = mul(x = var_6872_cast_fp16_0, y = layers_11_input_layernorm_weight_promoted_to_fp16)[name = string("h_67_cast_fp16")]; + tensor var_6878 = const()[name = string("op_6878"), val = tensor([0, 2, 1])]; + tensor var_6881_axes_0 = const()[name = string("op_6881_axes_0"), val = tensor([2])]; + tensor var_6879_cast_fp16 = transpose(perm = var_6878, x = h_67_cast_fp16)[name = string("transpose_17")]; + tensor var_6881_cast_fp16 = expand_dims(axes = var_6881_axes_0, x = var_6879_cast_fp16)[name = string("op_6881_cast_fp16")]; + string var_6897_pad_type_0 = const()[name = string("op_6897_pad_type_0"), val = string("valid")]; + tensor var_6897_strides_0 = const()[name = string("op_6897_strides_0"), val = tensor([1, 1])]; + tensor var_6897_pad_0 = const()[name = string("op_6897_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6897_dilations_0 = const()[name = string("op_6897_dilations_0"), val = tensor([1, 1])]; + int32 var_6897_groups_0 = const()[name = string("op_6897_groups_0"), val = int32(1)]; + tensor var_6897 = conv(dilations = var_6897_dilations_0, groups = var_6897_groups_0, pad = var_6897_pad_0, pad_type = var_6897_pad_type_0, strides = var_6897_strides_0, weight = layers_11_self_attn_q_proj_weight_palettized, x = var_6881_cast_fp16)[name = string("op_6897")]; + tensor var_6902 = const()[name = string("op_6902"), val = tensor([1, 8, 512, 1])]; + tensor var_6903 = reshape(shape = var_6902, x = var_6897)[name = string("op_6903")]; + tensor var_6908 = const()[name = string("op_6908"), val = tensor([0, 1, 3, 2])]; + tensor var_6918 = const()[name = string("op_6918"), val = tensor([1, 8, 512])]; + tensor var_6909 = transpose(perm = var_6908, x = var_6903)[name = string("transpose_16")]; + tensor x_221 = reshape(shape = var_6918, x = var_6909)[name = string("x_221")]; + int32 var_6924 = const()[name = string("op_6924"), val = int32(-1)]; + fp16 const_131_promoted = const()[name = string("const_131_promoted"), val = fp16(-0x1p+0)]; + tensor var_6926 = mul(x = x_221, y = const_131_promoted)[name = string("op_6926")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331 = concat(axis = var_6924, interleave = input_331_interleave_0, values = (x_221, var_6926))[name = string("input_331")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_6921_to_fp16 = const()[name = string("op_6921_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_6921_to_fp16, x = input_331)[name = string("normed_313_cast_fp16")]; + tensor var_6931_split_sizes_0 = const()[name = string("op_6931_split_sizes_0"), val = tensor([512, 512])]; + int32 var_6931_axis_0 = const()[name = string("op_6931_axis_0"), val = int32(-1)]; + tensor var_6931_0, tensor var_6931_1 = split(axis = var_6931_axis_0, split_sizes = var_6931_split_sizes_0, x = normed_313_cast_fp16)[name = string("op_6931")]; + tensor var_6933 = mul(x = var_6931_0, y = layers_11_self_attn_q_norm_weight)[name = string("op_6933")]; + tensor var_6938 = const()[name = string("op_6938"), val = tensor([1, 8, 1, 512])]; + tensor q_91 = reshape(shape = var_6938, x = var_6933)[name = string("q_91")]; + tensor var_6940_cast_fp16 = mul(x = q_91, y = cos_f)[name = string("op_6940_cast_fp16")]; + tensor var_6941_split_sizes_0 = const()[name = string("op_6941_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6941_axis_0 = const()[name = string("op_6941_axis_0"), val = int32(-1)]; + tensor var_6941_0, tensor var_6941_1 = split(axis = var_6941_axis_0, split_sizes = var_6941_split_sizes_0, x = q_91)[name = string("op_6941")]; + fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; + tensor var_6943 = mul(x = var_6941_1, y = const_132_promoted)[name = string("op_6943")]; + int32 var_6945 = const()[name = string("op_6945"), val = int32(-1)]; + bool var_6946_interleave_0 = const()[name = string("op_6946_interleave_0"), val = bool(false)]; + tensor var_6946 = concat(axis = var_6945, interleave = var_6946_interleave_0, values = (var_6943, var_6941_0))[name = string("op_6946")]; + tensor var_6947_cast_fp16 = mul(x = var_6946, y = sin_f)[name = string("op_6947_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_6940_cast_fp16, y = var_6947_cast_fp16)[name = string("q_cast_fp16")]; + string var_6960_pad_type_0 = const()[name = string("op_6960_pad_type_0"), val = string("valid")]; + tensor var_6960_strides_0 = const()[name = string("op_6960_strides_0"), val = tensor([1, 1])]; + tensor var_6960_pad_0 = const()[name = string("op_6960_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6960_dilations_0 = const()[name = string("op_6960_dilations_0"), val = tensor([1, 1])]; + int32 var_6960_groups_0 = const()[name = string("op_6960_groups_0"), val = int32(1)]; + tensor var_6960 = conv(dilations = var_6960_dilations_0, groups = var_6960_groups_0, pad = var_6960_pad_0, pad_type = var_6960_pad_type_0, strides = var_6960_strides_0, weight = layers_11_self_attn_k_proj_weight_palettized, x = var_6881_cast_fp16)[name = string("op_6960")]; + tensor var_6965 = const()[name = string("op_6965"), val = tensor([1, 2, 512, 1])]; + tensor var_6966 = reshape(shape = var_6965, x = var_6960)[name = string("op_6966")]; + tensor var_6971 = const()[name = string("op_6971"), val = tensor([0, 1, 3, 2])]; + string var_6988_pad_type_0 = const()[name = string("op_6988_pad_type_0"), val = string("valid")]; + tensor var_6988_strides_0 = const()[name = string("op_6988_strides_0"), val = tensor([1, 1])]; + tensor var_6988_pad_0 = const()[name = string("op_6988_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6988_dilations_0 = const()[name = string("op_6988_dilations_0"), val = tensor([1, 1])]; + int32 var_6988_groups_0 = const()[name = string("op_6988_groups_0"), val = int32(1)]; + tensor var_6988 = conv(dilations = var_6988_dilations_0, groups = var_6988_groups_0, pad = var_6988_pad_0, pad_type = var_6988_pad_type_0, strides = var_6988_strides_0, weight = layers_11_self_attn_v_proj_weight_palettized, x = var_6881_cast_fp16)[name = string("op_6988")]; + tensor var_6993 = const()[name = string("op_6993"), val = tensor([1, 2, 512, 1])]; + tensor var_6994 = reshape(shape = var_6993, x = var_6988)[name = string("op_6994")]; + tensor var_6999 = const()[name = string("op_6999"), val = tensor([0, 1, 3, 2])]; + tensor var_7009 = const()[name = string("op_7009"), val = tensor([1, 2, 512])]; + tensor var_6972 = transpose(perm = var_6971, x = var_6966)[name = string("transpose_15")]; + tensor x_223 = reshape(shape = var_7009, x = var_6972)[name = string("x_223")]; + int32 var_7015 = const()[name = string("op_7015"), val = int32(-1)]; + fp16 const_133_promoted = const()[name = string("const_133_promoted"), val = fp16(-0x1p+0)]; + tensor var_7017 = mul(x = x_223, y = const_133_promoted)[name = string("op_7017")]; + bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; + tensor input_333 = concat(axis = var_7015, interleave = input_333_interleave_0, values = (x_223, var_7017))[name = string("input_333")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_7012_to_fp16 = const()[name = string("op_7012_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_7012_to_fp16, x = input_333)[name = string("normed_317_cast_fp16")]; + tensor var_7022_split_sizes_0 = const()[name = string("op_7022_split_sizes_0"), val = tensor([512, 512])]; + int32 var_7022_axis_0 = const()[name = string("op_7022_axis_0"), val = int32(-1)]; + tensor var_7022_0, tensor var_7022_1 = split(axis = var_7022_axis_0, split_sizes = var_7022_split_sizes_0, x = normed_317_cast_fp16)[name = string("op_7022")]; + tensor var_7024 = mul(x = var_7022_0, y = layers_11_self_attn_k_norm_weight)[name = string("op_7024")]; + tensor var_7029 = const()[name = string("op_7029"), val = tensor([1, 2, 1, 512])]; + tensor q_93 = reshape(shape = var_7029, x = var_7024)[name = string("q_93")]; + fp16 var_7031_promoted = const()[name = string("op_7031_promoted"), val = fp16(0x1p+1)]; + tensor var_7000 = transpose(perm = var_6999, x = var_6994)[name = string("transpose_14")]; + tensor var_7032 = pow(x = var_7000, y = var_7031_promoted)[name = string("op_7032")]; + tensor var_7037_axes_0 = const()[name = string("op_7037_axes_0"), val = tensor([-1])]; + bool var_7037_keep_dims_0 = const()[name = string("op_7037_keep_dims_0"), val = bool(true)]; + tensor var_7037 = reduce_mean(axes = var_7037_axes_0, keep_dims = var_7037_keep_dims_0, x = var_7032)[name = string("op_7037")]; + fp16 var_7039_to_fp16 = const()[name = string("op_7039_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_cast_fp16 = add(x = var_7037, y = var_7039_to_fp16)[name = string("mean_sq_cast_fp16")]; + fp32 var_7041_epsilon_0 = const()[name = string("op_7041_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7041_cast_fp16 = rsqrt(epsilon = var_7041_epsilon_0, x = mean_sq_cast_fp16)[name = string("op_7041_cast_fp16")]; + tensor v_cast_fp16 = mul(x = var_7000, y = var_7041_cast_fp16)[name = string("v_cast_fp16")]; + tensor var_7043_cast_fp16 = mul(x = q_93, y = cos_f)[name = string("op_7043_cast_fp16")]; + tensor var_7044_split_sizes_0 = const()[name = string("op_7044_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7044_axis_0 = const()[name = string("op_7044_axis_0"), val = int32(-1)]; + tensor var_7044_0, tensor var_7044_1 = split(axis = var_7044_axis_0, split_sizes = var_7044_split_sizes_0, x = q_93)[name = string("op_7044")]; + fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; + tensor var_7046 = mul(x = var_7044_1, y = const_134_promoted)[name = string("op_7046")]; + int32 var_7048 = const()[name = string("op_7048"), val = int32(-1)]; + bool var_7049_interleave_0 = const()[name = string("op_7049_interleave_0"), val = bool(false)]; + tensor var_7049 = concat(axis = var_7048, interleave = var_7049_interleave_0, values = (var_7046, var_7044_0))[name = string("op_7049")]; + tensor var_7050_cast_fp16 = mul(x = var_7049, y = sin_f)[name = string("op_7050_cast_fp16")]; + tensor k_cast_fp16 = add(x = var_7043_cast_fp16, y = var_7050_cast_fp16)[name = string("k_cast_fp16")]; + tensor var_7056_cast_fp16 = mul(x = K_full_slot_cast_fp16, y = var_3733_cast_fp16)[name = string("op_7056_cast_fp16")]; + tensor var_7057_reps_0 = const()[name = string("op_7057_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_7057_cast_fp16 = tile(reps = var_7057_reps_0, x = k_cast_fp16)[name = string("op_7057_cast_fp16")]; + tensor var_7058_cast_fp16 = mul(x = var_7057_cast_fp16, y = update_mask)[name = string("op_7058_cast_fp16")]; + tensor kv14_k = add(x = var_7056_cast_fp16, y = var_7058_cast_fp16)[name = string("K_full_out_cast_fp16")]; + tensor var_7064_cast_fp16 = mul(x = V_full_slot_cast_fp16, y = var_3733_cast_fp16)[name = string("op_7064_cast_fp16")]; + tensor var_7065_reps_0 = const()[name = string("op_7065_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_7065_cast_fp16 = tile(reps = var_7065_reps_0, x = v_cast_fp16)[name = string("op_7065_cast_fp16")]; + tensor var_7066_cast_fp16 = mul(x = var_7065_cast_fp16, y = update_mask)[name = string("op_7066_cast_fp16")]; + tensor kv14_v = add(x = var_7064_cast_fp16, y = var_7066_cast_fp16)[name = string("V_full_out_cast_fp16")]; + tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_22_reps_0 = const()[name = string("tile_22_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_44_cast_fp16 = transpose(perm = transpose_44_perm_0, x = kv14_k)[name = string("transpose_13")]; + tensor tile_22_cast_fp16 = tile(reps = tile_22_reps_0, x = transpose_44_cast_fp16)[name = string("tile_22_cast_fp16")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_44_cast_fp16 = reshape(shape = concat_44, x = tile_22_cast_fp16)[name = string("reshape_44_cast_fp16")]; + tensor transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_45 = const()[name = string("concat_45"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_45_cast_fp16 = transpose(perm = transpose_45_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_12")]; + tensor reshape_45_cast_fp16 = reshape(shape = concat_45, x = transpose_45_cast_fp16)[name = string("reshape_45_cast_fp16")]; + tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_23_reps_0 = const()[name = string("tile_23_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_46_cast_fp16 = transpose(perm = transpose_46_perm_0, x = kv14_v)[name = string("transpose_11")]; + tensor tile_23_cast_fp16 = tile(reps = tile_23_reps_0, x = transpose_46_cast_fp16)[name = string("tile_23_cast_fp16")]; + tensor concat_46 = const()[name = string("concat_46"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_46_cast_fp16 = reshape(shape = concat_46, x = tile_23_cast_fp16)[name = string("reshape_46_cast_fp16")]; + tensor transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_47 = const()[name = string("concat_47"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_47_cast_fp16 = transpose(perm = transpose_47_perm_0, x = reshape_46_cast_fp16)[name = string("transpose_10")]; + tensor reshape_47_cast_fp16 = reshape(shape = concat_47, x = transpose_47_cast_fp16)[name = string("reshape_47_cast_fp16")]; + tensor V_expanded_perm_0 = const()[name = string("V_expanded_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_45_transpose_x_0 = const()[name = string("attn_weights_45_transpose_x_0"), val = bool(false)]; + bool attn_weights_45_transpose_y_0 = const()[name = string("attn_weights_45_transpose_y_0"), val = bool(false)]; + tensor transpose_59_cast_fp16 = transpose(perm = transpose_59_perm_0, x = reshape_45_cast_fp16)[name = string("transpose_9")]; + tensor attn_weights_45_cast_fp16 = matmul(transpose_x = attn_weights_45_transpose_x_0, transpose_y = attn_weights_45_transpose_y_0, x = q_cast_fp16, y = transpose_59_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + tensor x_227_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask_full)[name = string("x_227_cast_fp16")]; + tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; + bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; + tensor reduce_max_11 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = x_227_cast_fp16)[name = string("reduce_max_11")]; + tensor var_7118 = sub(x = x_227_cast_fp16, y = reduce_max_11)[name = string("op_7118")]; + tensor var_7124 = exp(x = var_7118)[name = string("op_7124")]; + tensor var_7134_axes_0 = const()[name = string("op_7134_axes_0"), val = tensor([-1])]; + bool var_7134_keep_dims_0 = const()[name = string("op_7134_keep_dims_0"), val = bool(true)]; + tensor var_7134 = reduce_sum(axes = var_7134_axes_0, keep_dims = var_7134_keep_dims_0, x = var_7124)[name = string("op_7134")]; + tensor var_7140_cast_fp16 = real_div(x = var_7124, y = var_7134)[name = string("op_7140_cast_fp16")]; + bool attn_output_67_transpose_x_0 = const()[name = string("attn_output_67_transpose_x_0"), val = bool(false)]; + bool attn_output_67_transpose_y_0 = const()[name = string("attn_output_67_transpose_y_0"), val = bool(false)]; + tensor V_expanded_cast_fp16 = transpose(perm = V_expanded_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_8")]; + tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_0, transpose_y = attn_output_67_transpose_y_0, x = var_7140_cast_fp16, y = V_expanded_cast_fp16)[name = string("attn_output_67_cast_fp16")]; + tensor var_7151 = const()[name = string("op_7151"), val = tensor([0, 2, 1, 3])]; + tensor var_7158 = const()[name = string("op_7158"), val = tensor([1, 1, -1])]; + tensor var_7152_cast_fp16 = transpose(perm = var_7151, x = attn_output_67_cast_fp16)[name = string("transpose_7")]; + tensor attn_output_69_cast_fp16 = reshape(shape = var_7158, x = var_7152_cast_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor var_7163 = const()[name = string("op_7163"), val = tensor([0, 2, 1])]; + string var_7179_pad_type_0 = const()[name = string("op_7179_pad_type_0"), val = string("valid")]; + int32 var_7179_groups_0 = const()[name = string("op_7179_groups_0"), val = int32(1)]; + tensor var_7179_strides_0 = const()[name = string("op_7179_strides_0"), val = tensor([1])]; + tensor var_7179_pad_0 = const()[name = string("op_7179_pad_0"), val = tensor([0, 0])]; + tensor var_7179_dilations_0 = const()[name = string("op_7179_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566605504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571848448))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7164_cast_fp16 = transpose(perm = var_7163, x = attn_output_69_cast_fp16)[name = string("transpose_6")]; + tensor var_7179_cast_fp16 = conv(dilations = var_7179_dilations_0, groups = var_7179_groups_0, pad = var_7179_pad_0, pad_type = var_7179_pad_type_0, strides = var_7179_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7164_cast_fp16)[name = string("op_7179_cast_fp16")]; + tensor var_7183 = const()[name = string("op_7183"), val = tensor([0, 2, 1])]; + int32 var_7189 = const()[name = string("op_7189"), val = int32(-1)]; + fp16 const_135_promoted_to_fp16 = const()[name = string("const_135_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_231_cast_fp16 = transpose(perm = var_7183, x = var_7179_cast_fp16)[name = string("transpose_5")]; + tensor var_7191_cast_fp16 = mul(x = x_231_cast_fp16, y = const_135_promoted_to_fp16)[name = string("op_7191_cast_fp16")]; + bool input_337_interleave_0 = const()[name = string("input_337_interleave_0"), val = bool(false)]; + tensor input_337_cast_fp16 = concat(axis = var_7189, interleave = input_337_interleave_0, values = (x_231_cast_fp16, var_7191_cast_fp16))[name = string("input_337_cast_fp16")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_7186_to_fp16 = const()[name = string("op_7186_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_7186_to_fp16, x = input_337_cast_fp16)[name = string("normed_321_cast_fp16")]; + tensor var_7196_split_sizes_0 = const()[name = string("op_7196_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7196_axis_0 = const()[name = string("op_7196_axis_0"), val = int32(-1)]; + tensor var_7196_cast_fp16_0, tensor var_7196_cast_fp16_1 = split(axis = var_7196_axis_0, split_sizes = var_7196_split_sizes_0, x = normed_321_cast_fp16)[name = string("op_7196_cast_fp16")]; + tensor layers_11_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571851072)))]; + tensor attn_output_cast_fp16 = mul(x = var_7196_cast_fp16_0, y = layers_11_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_233_cast_fp16 = add(x = x_219_cast_fp16, y = attn_output_cast_fp16)[name = string("x_233_cast_fp16")]; + int32 var_7205 = const()[name = string("op_7205"), val = int32(-1)]; + fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7207_cast_fp16 = mul(x = x_233_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_7207_cast_fp16")]; + bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; + tensor input_339_cast_fp16 = concat(axis = var_7205, interleave = input_339_interleave_0, values = (x_233_cast_fp16, var_7207_cast_fp16))[name = string("input_339_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_7202_to_fp16 = const()[name = string("op_7202_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_7202_to_fp16, x = input_339_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor var_7212_split_sizes_0 = const()[name = string("op_7212_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7212_axis_0 = const()[name = string("op_7212_axis_0"), val = int32(-1)]; + tensor var_7212_cast_fp16_0, tensor var_7212_cast_fp16_1 = split(axis = var_7212_axis_0, split_sizes = var_7212_split_sizes_0, x = normed_325_cast_fp16)[name = string("op_7212_cast_fp16")]; + tensor layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571856256)))]; + tensor h_69_cast_fp16 = mul(x = var_7212_cast_fp16_0, y = layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_69_cast_fp16")]; + tensor var_7223 = const()[name = string("op_7223"), val = tensor([0, 2, 1])]; + tensor input_341_axes_0 = const()[name = string("input_341_axes_0"), val = tensor([2])]; + tensor var_7224 = transpose(perm = var_7223, x = h_69_cast_fp16)[name = string("transpose_4")]; + tensor input_341 = expand_dims(axes = input_341_axes_0, x = var_7224)[name = string("input_341")]; + string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; + tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; + tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; + int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; + tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = layers_11_mlp_gate_proj_weight_palettized, x = input_341)[name = string("gate_45")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_11_mlp_up_proj_weight_palettized, x = input_341)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_45)[name = string("gate")]; + tensor input_343 = mul(x = gate, y = up)[name = string("input_343")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_11_mlp_down_proj_weight_palettized, x = input_343)[name = string("mlp_out")]; + tensor var_7264_axes_0 = const()[name = string("op_7264_axes_0"), val = tensor([2])]; + tensor var_7264 = squeeze(axes = var_7264_axes_0, x = mlp_out)[name = string("op_7264")]; + tensor var_7268 = const()[name = string("op_7268"), val = tensor([0, 2, 1])]; + int32 var_7274 = const()[name = string("op_7274"), val = int32(-1)]; + fp16 const_137_promoted = const()[name = string("const_137_promoted"), val = fp16(-0x1p+0)]; + tensor x_235 = transpose(perm = var_7268, x = var_7264)[name = string("transpose_3")]; + tensor var_7276 = mul(x = x_235, y = const_137_promoted)[name = string("op_7276")]; + bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; + tensor input_345 = concat(axis = var_7274, interleave = input_345_interleave_0, values = (x_235, var_7276))[name = string("input_345")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_7271_to_fp16 = const()[name = string("op_7271_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_7271_to_fp16, x = input_345)[name = string("normed_329_cast_fp16")]; + tensor var_7281_split_sizes_0 = const()[name = string("op_7281_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7281_axis_0 = const()[name = string("op_7281_axis_0"), val = int32(-1)]; + tensor var_7281_0, tensor var_7281_1 = split(axis = var_7281_axis_0, split_sizes = var_7281_split_sizes_0, x = normed_329_cast_fp16)[name = string("op_7281")]; + tensor hidden_states_113 = mul(x = var_7281_0, y = layers_11_post_feedforward_layernorm_weight)[name = string("hidden_states_113")]; + tensor hidden_states_115_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_113)[name = string("hidden_states_115_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 5888])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 1, 6144])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; + tensor var_7309 = const()[name = string("op_7309"), val = tensor([0, 2, 1])]; + tensor input_347_axes_0 = const()[name = string("input_347_axes_0"), val = tensor([2])]; + tensor var_7310 = transpose(perm = var_7309, x = hidden_states_115_cast_fp16)[name = string("transpose_2")]; + tensor input_347 = expand_dims(axes = input_347_axes_0, x = var_7310)[name = string("input_347")]; + string gated_67_pad_type_0 = const()[name = string("gated_67_pad_type_0"), val = string("valid")]; + tensor gated_67_strides_0 = const()[name = string("gated_67_strides_0"), val = tensor([1, 1])]; + tensor gated_67_pad_0 = const()[name = string("gated_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_67_dilations_0 = const()[name = string("gated_67_dilations_0"), val = tensor([1, 1])]; + int32 gated_67_groups_0 = const()[name = string("gated_67_groups_0"), val = int32(1)]; + tensor gated_67 = conv(dilations = gated_67_dilations_0, groups = gated_67_groups_0, pad = gated_67_pad_0, pad_type = gated_67_pad_type_0, strides = gated_67_strides_0, weight = layers_11_per_layer_input_gate_weight_palettized, x = input_347)[name = string("gated_67")]; + string gated_69_mode_0 = const()[name = string("gated_69_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_69 = gelu(mode = gated_69_mode_0, x = gated_67)[name = string("gated_69")]; + tensor var_7329 = const()[name = string("op_7329"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_7330_cast_fp16 = transpose(perm = var_7329, x = per_layer_slice_cast_fp16)[name = string("transpose_1")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_7330_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_349_cast_fp16 = mul(x = gated_69, y = per_layer_slice_conv_cast_fp16)[name = string("input_349_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_11_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571861440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572189184))))[name = string("layers_11_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_11_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_349_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_7346_axes_0 = const()[name = string("op_7346_axes_0"), val = tensor([2])]; + tensor var_7346_cast_fp16 = squeeze(axes = var_7346_axes_0, x = gated_cast_fp16)[name = string("op_7346_cast_fp16")]; + tensor var_7350 = const()[name = string("op_7350"), val = tensor([0, 2, 1])]; + int32 var_7356 = const()[name = string("op_7356"), val = int32(-1)]; + fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_cast_fp16 = transpose(perm = var_7350, x = var_7346_cast_fp16)[name = string("transpose_0")]; + tensor var_7358_cast_fp16 = mul(x = x_cast_fp16, y = const_138_promoted_to_fp16)[name = string("op_7358_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_7356, interleave = input_interleave_0, values = (x_cast_fp16, var_7358_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_7353_to_fp16 = const()[name = string("op_7353_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_7353_to_fp16, x = input_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor var_7363_split_sizes_0 = const()[name = string("op_7363_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7363_axis_0 = const()[name = string("op_7363_axis_0"), val = int32(-1)]; + tensor var_7363_cast_fp16_0, tensor var_7363_cast_fp16_1 = split(axis = var_7363_axis_0, split_sizes = var_7363_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_7363_cast_fp16")]; + tensor layers_11_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_11_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572191808)))]; + tensor hidden_states_119_cast_fp16 = mul(x = var_7363_cast_fp16_0, y = layers_11_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_115_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_139_promoted_to_fp16 = const()[name = string("const_139_promoted_to_fp16"), val = tensor([0x1.0cp-4])]; + tensor hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_139_promoted_to_fp16)[name = string("op_7373_cast_fp16")]; + tensor var_7375_axes_0 = const()[name = string("op_7375_axes_0"), val = tensor([0])]; + tensor var_7375_cast_fp16 = squeeze(axes = var_7375_axes_0, x = kv14_k)[name = string("op_7375_cast_fp16")]; + tensor var_7377_axes_0 = const()[name = string("op_7377_axes_0"), val = tensor([0])]; + tensor var_7377_cast_fp16 = squeeze(axes = var_7377_axes_0, x = kv14_v)[name = string("op_7377_cast_fp16")]; + int32 var_7380_axis_0 = const()[name = string("op_7380_axis_0"), val = int32(0)]; + tensor K_sliding_out = stack(axis = var_7380_axis_0, values = (var_1290_cast_fp16, var_1849_cast_fp16, var_2408_cast_fp16, var_2967_cast_fp16, var_3526_cast_fp16, var_4602_cast_fp16, var_5161_cast_fp16, var_5720_cast_fp16, var_6279_cast_fp16, var_6848_cast_fp16))[name = string("op_7380_cast_fp16")]; + int32 var_7383_axis_0 = const()[name = string("op_7383_axis_0"), val = int32(0)]; + tensor V_sliding_out = stack(axis = var_7383_axis_0, values = (var_1292_cast_fp16, var_1851_cast_fp16, var_2410_cast_fp16, var_2969_cast_fp16, var_3528_cast_fp16, var_4604_cast_fp16, var_5163_cast_fp16, var_5722_cast_fp16, var_6281_cast_fp16, var_6850_cast_fp16))[name = string("op_7383_cast_fp16")]; + int32 var_7386_axis_0 = const()[name = string("op_7386_axis_0"), val = int32(0)]; + tensor K_full_out = stack(axis = var_7386_axis_0, values = (var_4043_cast_fp16, var_7375_cast_fp16))[name = string("op_7386_cast_fp16")]; + int32 var_7389_axis_0 = const()[name = string("op_7389_axis_0"), val = int32(0)]; + tensor V_full_out = stack(axis = var_7389_axis_0, values = (var_4045_cast_fp16, var_7377_cast_fp16))[name = string("op_7389_cast_fp16")]; + } -> (hidden_states_out, K_sliding_out, V_sliding_out, K_full_out, V_full_out, kv13_k, kv13_v, kv14_k, kv14_v); + func verify_qK(tensor K_full_in, tensor K_sliding_in, tensor V_full_in, tensor V_sliding_in, tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor per_layer_combined, tensor sin_f, tensor sin_s, tensor update_indicator) { + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2621568))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2623680)))]; + tensor layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2624256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3279680))))[name = string("layers_0_self_attn_k_proj_weight_palettized")]; + tensor layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3280256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3935680))))[name = string("layers_0_self_attn_v_proj_weight_palettized")]; + tensor layers_0_self_attn_k_norm_weight = const()[name = string("layers_0_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3936256)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3936832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17044096))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17054400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30161664))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30171968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43279232))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43281856)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43287040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43614784))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43615104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46236608))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_self_attn_q_norm_weight = const()[name = string("layers_1_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46238720)))]; + tensor layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46239296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46894720))))[name = string("layers_1_self_attn_k_proj_weight_palettized")]; + tensor layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46895296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47550720))))[name = string("layers_1_self_attn_v_proj_weight_palettized")]; + tensor layers_1_self_attn_k_norm_weight = const()[name = string("layers_1_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47551296)))]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47551872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60659136))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60669440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73776704))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73787008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86894272))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86896896)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86902080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87229824))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87230144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89851648))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_self_attn_q_norm_weight = const()[name = string("layers_2_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89853760)))]; + tensor layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89854336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90509760))))[name = string("layers_2_self_attn_k_proj_weight_palettized")]; + tensor layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90510336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91165760))))[name = string("layers_2_self_attn_v_proj_weight_palettized")]; + tensor layers_2_self_attn_k_norm_weight = const()[name = string("layers_2_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91166336)))]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91166912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104274176))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104284480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117391744))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117402048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130509312))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130511936)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130844864))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130845184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133466688))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_self_attn_q_norm_weight = const()[name = string("layers_3_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133468800)))]; + tensor layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133469376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134124800))))[name = string("layers_3_self_attn_k_proj_weight_palettized")]; + tensor layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134125376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134780800))))[name = string("layers_3_self_attn_v_proj_weight_palettized")]; + tensor layers_3_self_attn_k_norm_weight = const()[name = string("layers_3_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134781376)))]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134781952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147889216))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147899520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161006784))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161017088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174124352))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174126976)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174132160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174459904))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174460224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177081728))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_self_attn_q_norm_weight = const()[name = string("layers_4_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177083840)))]; + tensor layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177084416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177739840))))[name = string("layers_4_self_attn_k_proj_weight_palettized")]; + tensor layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177740416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178395840))))[name = string("layers_4_self_attn_v_proj_weight_palettized")]; + tensor layers_4_self_attn_k_norm_weight = const()[name = string("layers_4_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178396416)))]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178396992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191504256))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191514560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204621824))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204632128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217739392))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217742016)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217747200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218074944))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218075264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223318208))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_self_attn_q_norm_weight = const()[name = string("layers_5_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223322368)))]; + tensor layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223323456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224634240))))[name = string("layers_5_self_attn_k_proj_weight_palettized")]; + tensor layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224635328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225946112))))[name = string("layers_5_self_attn_v_proj_weight_palettized")]; + tensor layers_5_self_attn_k_norm_weight = const()[name = string("layers_5_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225947200)))]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225948288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239055552))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239065856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252173120))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252183424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265290688))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265293312)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265298496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265626240))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265626560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268248064))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268250176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268905600))))[name = string("layers_6_self_attn_k_proj_weight_palettized")]; + tensor layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268906176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269561600))))[name = string("layers_6_self_attn_v_proj_weight_palettized")]; + tensor layers_6_self_attn_k_norm_weight = const()[name = string("layers_6_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269562176)))]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269562752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282670016))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282680320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295787584))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295797888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308905152))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308907776)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308912960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309240704))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309241024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311862528))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_self_attn_q_norm_weight = const()[name = string("layers_7_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311864640)))]; + tensor layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311865216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312520640))))[name = string("layers_7_self_attn_k_proj_weight_palettized")]; + tensor layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312521216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313176640))))[name = string("layers_7_self_attn_v_proj_weight_palettized")]; + tensor layers_7_self_attn_k_norm_weight = const()[name = string("layers_7_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313177216)))]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313177792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326285056))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326295360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339402624))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339412928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352520192))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352522816)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352528000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352855744))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352856064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355477568))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355479680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356135104))))[name = string("layers_8_self_attn_k_proj_weight_palettized")]; + tensor layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356135680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356791104))))[name = string("layers_8_self_attn_v_proj_weight_palettized")]; + tensor layers_8_self_attn_k_norm_weight = const()[name = string("layers_8_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356791680)))]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356792256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369899520))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369909824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383017088))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383027392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396134656))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396137280)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396142464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396470208))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + tensor layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396470528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399092032))))[name = string("layers_9_self_attn_q_proj_weight_palettized")]; + tensor layers_9_self_attn_q_norm_weight = const()[name = string("layers_9_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399094144)))]; + tensor layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399094720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399750144))))[name = string("layers_9_self_attn_k_proj_weight_palettized")]; + tensor layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399750720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400406144))))[name = string("layers_9_self_attn_v_proj_weight_palettized")]; + tensor layers_9_self_attn_k_norm_weight = const()[name = string("layers_9_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400406720)))]; + tensor layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400407296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413514560))))[name = string("layers_9_mlp_gate_proj_weight_palettized")]; + tensor layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413524864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426632128))))[name = string("layers_9_mlp_up_proj_weight_palettized")]; + tensor layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426642432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439749696))))[name = string("layers_9_mlp_down_proj_weight_palettized")]; + tensor layers_9_post_feedforward_layernorm_weight = const()[name = string("layers_9_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439752320)))]; + tensor layers_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439757504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440085248))))[name = string("layers_9_per_layer_input_gate_weight_palettized")]; + tensor layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440085568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442707072))))[name = string("layers_10_self_attn_q_proj_weight_palettized")]; + tensor layers_10_self_attn_q_norm_weight = const()[name = string("layers_10_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442709184)))]; + tensor layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442709760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443365184))))[name = string("layers_10_self_attn_k_proj_weight_palettized")]; + tensor layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443365760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444021184))))[name = string("layers_10_self_attn_v_proj_weight_palettized")]; + tensor layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444021760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457129024))))[name = string("layers_10_mlp_gate_proj_weight_palettized")]; + tensor layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457139328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470246592))))[name = string("layers_10_mlp_up_proj_weight_palettized")]; + tensor layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470256896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483364160))))[name = string("layers_10_mlp_down_proj_weight_palettized")]; + tensor layers_10_post_feedforward_layernorm_weight = const()[name = string("layers_10_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483366784)))]; + tensor layers_10_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483371968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483699712))))[name = string("layers_10_per_layer_input_gate_weight_palettized")]; + tensor layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483700032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488942976))))[name = string("layers_11_self_attn_q_proj_weight_palettized")]; + tensor layers_11_self_attn_q_norm_weight = const()[name = string("layers_11_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488947136)))]; + tensor layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488948224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490259008))))[name = string("layers_11_self_attn_k_proj_weight_palettized")]; + tensor layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490260096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491570880))))[name = string("layers_11_self_attn_v_proj_weight_palettized")]; + tensor layers_11_self_attn_k_norm_weight = const()[name = string("layers_11_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491571968)))]; + tensor layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491573056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504680320))))[name = string("layers_11_mlp_gate_proj_weight_palettized")]; + tensor layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504690624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517797888))))[name = string("layers_11_mlp_up_proj_weight_palettized")]; + tensor layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517808192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530915456))))[name = string("layers_11_mlp_down_proj_weight_palettized")]; + tensor layers_11_post_feedforward_layernorm_weight = const()[name = string("layers_11_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530918080)))]; + tensor layers_11_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530923264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531251008))))[name = string("layers_11_per_layer_input_gate_weight_palettized")]; + int32 var_738 = const()[name = string("op_738"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_740_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_740_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_738, interleave = input_1_interleave_0, values = (hidden_states, var_740_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_735_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_745_split_sizes_0 = const()[name = string("op_745_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_745_axis_0 = const()[name = string("op_745_axis_0"), val = int32(-1)]; + tensor var_745_cast_fp16_0, tensor var_745_cast_fp16_1 = split(axis = var_745_axis_0, split_sizes = var_745_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_745_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531251328)))]; + tensor h_1_cast_fp16 = mul(x = var_745_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_751 = const()[name = string("op_751"), val = tensor([0, 2, 1])]; + tensor var_754_axes_0 = const()[name = string("op_754_axes_0"), val = tensor([2])]; + tensor var_752_cast_fp16 = transpose(perm = var_751, x = h_1_cast_fp16)[name = string("transpose_239")]; + tensor var_754_cast_fp16 = expand_dims(axes = var_754_axes_0, x = var_752_cast_fp16)[name = string("op_754_cast_fp16")]; + string q_1_pad_type_0 = const()[name = string("q_1_pad_type_0"), val = string("valid")]; + tensor q_1_strides_0 = const()[name = string("q_1_strides_0"), val = tensor([1, 1])]; + tensor q_1_pad_0 = const()[name = string("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_1_dilations_0 = const()[name = string("q_1_dilations_0"), val = tensor([1, 1])]; + int32 q_1_groups_0 = const()[name = string("q_1_groups_0"), val = int32(1)]; + tensor q_1 = conv(dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_754_cast_fp16)[name = string("q_1")]; + tensor var_775 = const()[name = string("op_775"), val = tensor([1, 8, 256, 3])]; + tensor var_776 = reshape(shape = var_775, x = q_1)[name = string("op_776")]; + tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_799 = const()[name = string("op_799"), val = tensor([3, 8, 256])]; + tensor transpose_48 = transpose(perm = transpose_48_perm_0, x = var_776)[name = string("transpose_238")]; + tensor x_1 = reshape(shape = var_799, x = transpose_48)[name = string("x_1")]; + int32 var_805 = const()[name = string("op_805"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_807 = mul(x = x_1, y = const_1_promoted)[name = string("op_807")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_805, interleave = input_5_interleave_0, values = (x_1, var_807))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_802_to_fp16 = const()[name = string("op_802_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_802_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor var_812_split_sizes_0 = const()[name = string("op_812_split_sizes_0"), val = tensor([256, 256])]; + int32 var_812_axis_0 = const()[name = string("op_812_axis_0"), val = int32(-1)]; + tensor var_812_0, tensor var_812_1 = split(axis = var_812_axis_0, split_sizes = var_812_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_812")]; + tensor q_5 = mul(x = var_812_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_5")]; + tensor var_819 = const()[name = string("op_819"), val = tensor([1, 3, 8, 256])]; + tensor var_820 = reshape(shape = var_819, x = q_5)[name = string("op_820")]; + tensor var_825 = const()[name = string("op_825"), val = tensor([0, 2, 1, 3])]; + tensor q_7 = transpose(perm = var_825, x = var_820)[name = string("transpose_237")]; + tensor var_827_cast_fp16 = mul(x = q_7, y = cos_s)[name = string("op_827_cast_fp16")]; + tensor var_828_split_sizes_0 = const()[name = string("op_828_split_sizes_0"), val = tensor([128, 128])]; + int32 var_828_axis_0 = const()[name = string("op_828_axis_0"), val = int32(-1)]; + tensor var_828_0, tensor var_828_1 = split(axis = var_828_axis_0, split_sizes = var_828_split_sizes_0, x = q_7)[name = string("op_828")]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_830 = mul(x = var_828_1, y = const_2_promoted)[name = string("op_830")]; + int32 var_832 = const()[name = string("op_832"), val = int32(-1)]; + bool var_833_interleave_0 = const()[name = string("op_833_interleave_0"), val = bool(false)]; + tensor var_833 = concat(axis = var_832, interleave = var_833_interleave_0, values = (var_830, var_828_0))[name = string("op_833")]; + tensor var_834_cast_fp16 = mul(x = var_833, y = sin_s)[name = string("op_834_cast_fp16")]; + tensor q_11_cast_fp16 = add(x = var_827_cast_fp16, y = var_834_cast_fp16)[name = string("q_11_cast_fp16")]; + string k_1_pad_type_0 = const()[name = string("k_1_pad_type_0"), val = string("valid")]; + tensor k_1_strides_0 = const()[name = string("k_1_strides_0"), val = tensor([1, 1])]; + tensor k_1_pad_0 = const()[name = string("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_1_dilations_0 = const()[name = string("k_1_dilations_0"), val = tensor([1, 1])]; + int32 k_1_groups_0 = const()[name = string("k_1_groups_0"), val = int32(1)]; + tensor k_1 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = layers_0_self_attn_k_proj_weight_palettized, x = var_754_cast_fp16)[name = string("k_1")]; + tensor var_852 = const()[name = string("op_852"), val = tensor([1, 2, 256, 3])]; + tensor var_853 = reshape(shape = var_852, x = k_1)[name = string("op_853")]; + tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_1_pad_type_0 = const()[name = string("v_1_pad_type_0"), val = string("valid")]; + tensor v_1_strides_0 = const()[name = string("v_1_strides_0"), val = tensor([1, 1])]; + tensor v_1_pad_0 = const()[name = string("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_1_dilations_0 = const()[name = string("v_1_dilations_0"), val = tensor([1, 1])]; + int32 v_1_groups_0 = const()[name = string("v_1_groups_0"), val = int32(1)]; + tensor v_1 = conv(dilations = v_1_dilations_0, groups = v_1_groups_0, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = v_1_strides_0, weight = layers_0_self_attn_v_proj_weight_palettized, x = var_754_cast_fp16)[name = string("v_1")]; + tensor var_880 = const()[name = string("op_880"), val = tensor([1, 2, 256, 3])]; + tensor var_881 = reshape(shape = var_880, x = v_1)[name = string("op_881")]; + tensor var_886 = const()[name = string("op_886"), val = tensor([0, 1, 3, 2])]; + tensor var_904 = const()[name = string("op_904"), val = tensor([3, 2, 256])]; + tensor transpose_49 = transpose(perm = transpose_49_perm_0, x = var_853)[name = string("transpose_236")]; + tensor x_3 = reshape(shape = var_904, x = transpose_49)[name = string("x_3")]; + int32 var_910 = const()[name = string("op_910"), val = int32(-1)]; + fp16 const_3_promoted = const()[name = string("const_3_promoted"), val = fp16(-0x1p+0)]; + tensor var_912 = mul(x = x_3, y = const_3_promoted)[name = string("op_912")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_910, interleave = input_7_interleave_0, values = (x_3, var_912))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_907_to_fp16 = const()[name = string("op_907_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_907_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor var_917_split_sizes_0 = const()[name = string("op_917_split_sizes_0"), val = tensor([256, 256])]; + int32 var_917_axis_0 = const()[name = string("op_917_axis_0"), val = int32(-1)]; + tensor var_917_0, tensor var_917_1 = split(axis = var_917_axis_0, split_sizes = var_917_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_917")]; + tensor k_5 = mul(x = var_917_0, y = layers_0_self_attn_k_norm_weight)[name = string("k_5")]; + tensor var_924 = const()[name = string("op_924"), val = tensor([1, 3, 2, 256])]; + tensor var_925 = reshape(shape = var_924, x = k_5)[name = string("op_925")]; + tensor var_930 = const()[name = string("op_930"), val = tensor([0, 2, 1, 3])]; + fp16 var_932_promoted = const()[name = string("op_932_promoted"), val = fp16(0x1p+1)]; + tensor var_887 = transpose(perm = var_886, x = var_881)[name = string("transpose_235")]; + tensor var_933 = pow(x = var_887, y = var_932_promoted)[name = string("op_933")]; + tensor var_938_axes_0 = const()[name = string("op_938_axes_0"), val = tensor([-1])]; + bool var_938_keep_dims_0 = const()[name = string("op_938_keep_dims_0"), val = bool(true)]; + tensor var_938 = reduce_mean(axes = var_938_axes_0, keep_dims = var_938_keep_dims_0, x = var_933)[name = string("op_938")]; + fp16 var_940_to_fp16 = const()[name = string("op_940_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_1_cast_fp16 = add(x = var_938, y = var_940_to_fp16)[name = string("mean_sq_1_cast_fp16")]; + fp32 var_942_epsilon_0 = const()[name = string("op_942_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_942_cast_fp16 = rsqrt(epsilon = var_942_epsilon_0, x = mean_sq_1_cast_fp16)[name = string("op_942_cast_fp16")]; + tensor input_11_cast_fp16 = mul(x = var_887, y = var_942_cast_fp16)[name = string("input_11_cast_fp16")]; + tensor q_9 = transpose(perm = var_930, x = var_925)[name = string("transpose_234")]; + tensor var_944_cast_fp16 = mul(x = q_9, y = cos_s)[name = string("op_944_cast_fp16")]; + tensor var_945_split_sizes_0 = const()[name = string("op_945_split_sizes_0"), val = tensor([128, 128])]; + int32 var_945_axis_0 = const()[name = string("op_945_axis_0"), val = int32(-1)]; + tensor var_945_0, tensor var_945_1 = split(axis = var_945_axis_0, split_sizes = var_945_split_sizes_0, x = q_9)[name = string("op_945")]; + fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; + tensor var_947 = mul(x = var_945_1, y = const_4_promoted)[name = string("op_947")]; + int32 var_949 = const()[name = string("op_949"), val = int32(-1)]; + bool var_950_interleave_0 = const()[name = string("op_950_interleave_0"), val = bool(false)]; + tensor var_950 = concat(axis = var_949, interleave = var_950_interleave_0, values = (var_947, var_945_0))[name = string("op_950")]; + tensor var_951_cast_fp16 = mul(x = var_950, y = sin_s)[name = string("op_951_cast_fp16")]; + tensor input_9_cast_fp16 = add(x = var_944_cast_fp16, y = var_951_cast_fp16)[name = string("input_9_cast_fp16")]; + tensor k_padded_1_pad_0 = const()[name = string("k_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_1_mode_0 = const()[name = string("k_padded_1_mode_0"), val = string("constant")]; + fp16 const_5_to_fp16 = const()[name = string("const_5_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_1_cast_fp16 = pad(constant_val = const_5_to_fp16, mode = k_padded_1_mode_0, pad = k_padded_1_pad_0, x = input_9_cast_fp16)[name = string("k_padded_1_cast_fp16")]; + tensor v_padded_1_pad_0 = const()[name = string("v_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_1_mode_0 = const()[name = string("v_padded_1_mode_0"), val = string("constant")]; + fp16 const_6_to_fp16 = const()[name = string("const_6_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_1_cast_fp16 = pad(constant_val = const_6_to_fp16, mode = v_padded_1_mode_0, pad = v_padded_1_pad_0, x = input_11_cast_fp16)[name = string("v_padded_1_cast_fp16")]; + tensor slot_k_1_begin_0 = const()[name = string("slot_k_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor slot_k_1_end_0 = const()[name = string("slot_k_1_end_0"), val = tensor([1, 2, 512, 512])]; + tensor slot_k_1_end_mask_0 = const()[name = string("slot_k_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_1_cast_fp16 = slice_by_index(begin = slot_k_1_begin_0, end = slot_k_1_end_0, end_mask = slot_k_1_end_mask_0, x = K_sliding_in)[name = string("slot_k_1_cast_fp16")]; + tensor slot_v_1_begin_0 = const()[name = string("slot_v_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor slot_v_1_end_0 = const()[name = string("slot_v_1_end_0"), val = tensor([1, 2, 512, 512])]; + tensor slot_v_1_end_mask_0 = const()[name = string("slot_v_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_1_cast_fp16 = slice_by_index(begin = slot_v_1_begin_0, end = slot_v_1_end_0, end_mask = slot_v_1_end_mask_0, x = V_sliding_in)[name = string("slot_v_1_cast_fp16")]; + tensor var_990_begin_0 = const()[name = string("op_990_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_990_end_0 = const()[name = string("op_990_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_990_end_mask_0 = const()[name = string("op_990_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_990_cast_fp16 = slice_by_index(begin = var_990_begin_0, end = var_990_end_0, end_mask = var_990_end_mask_0, x = slot_k_1_cast_fp16)[name = string("op_990_cast_fp16")]; + int32 var_997 = const()[name = string("op_997"), val = int32(2)]; + bool new_k_1_interleave_0 = const()[name = string("new_k_1_interleave_0"), val = bool(false)]; + tensor new_k_1_cast_fp16 = concat(axis = var_997, interleave = new_k_1_interleave_0, values = (var_990_cast_fp16, k_padded_1_cast_fp16))[name = string("new_k_1_cast_fp16")]; + tensor var_1013_begin_0 = const()[name = string("op_1013_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_1013_end_0 = const()[name = string("op_1013_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1013_end_mask_0 = const()[name = string("op_1013_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1013_cast_fp16 = slice_by_index(begin = var_1013_begin_0, end = var_1013_end_0, end_mask = var_1013_end_mask_0, x = slot_v_1_cast_fp16)[name = string("op_1013_cast_fp16")]; + int32 var_1020 = const()[name = string("op_1020"), val = int32(2)]; + bool new_v_1_interleave_0 = const()[name = string("new_v_1_interleave_0"), val = bool(false)]; + tensor new_v_1_cast_fp16 = concat(axis = var_1020, interleave = new_v_1_interleave_0, values = (var_1013_cast_fp16, v_padded_1_cast_fp16))[name = string("new_v_1_cast_fp16")]; + tensor var_1031_begin_0 = const()[name = string("op_1031_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1031_end_0 = const()[name = string("op_1031_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_1031_end_mask_0 = const()[name = string("op_1031_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1031_cast_fp16 = slice_by_index(begin = var_1031_begin_0, end = var_1031_end_0, end_mask = var_1031_end_mask_0, x = K_sliding_in)[name = string("op_1031_cast_fp16")]; + int32 var_1033 = const()[name = string("op_1033"), val = int32(0)]; + bool K_sliding_out_1_interleave_0 = const()[name = string("K_sliding_out_1_interleave_0"), val = bool(false)]; + tensor K_sliding_out_1_cast_fp16 = concat(axis = var_1033, interleave = K_sliding_out_1_interleave_0, values = (new_k_1_cast_fp16, var_1031_cast_fp16))[name = string("K_sliding_out_1_cast_fp16")]; + tensor var_1044_begin_0 = const()[name = string("op_1044_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1044_end_0 = const()[name = string("op_1044_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_1044_end_mask_0 = const()[name = string("op_1044_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1044_cast_fp16 = slice_by_index(begin = var_1044_begin_0, end = var_1044_end_0, end_mask = var_1044_end_mask_0, x = V_sliding_in)[name = string("op_1044_cast_fp16")]; + int32 var_1046 = const()[name = string("op_1046"), val = int32(0)]; + bool V_sliding_out_1_interleave_0 = const()[name = string("V_sliding_out_1_interleave_0"), val = bool(false)]; + tensor V_sliding_out_1_cast_fp16 = concat(axis = var_1046, interleave = V_sliding_out_1_interleave_0, values = (new_v_1_cast_fp16, var_1044_cast_fp16))[name = string("V_sliding_out_1_cast_fp16")]; + tensor var_1052_begin_0 = const()[name = string("op_1052_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1052_end_0 = const()[name = string("op_1052_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1052_end_mask_0 = const()[name = string("op_1052_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1052_cast_fp16 = slice_by_index(begin = var_1052_begin_0, end = var_1052_end_0, end_mask = var_1052_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("op_1052_cast_fp16")]; + tensor K_for_attn_1_begin_0 = const()[name = string("K_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_1_end_0 = const()[name = string("K_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_1_end_mask_0 = const()[name = string("K_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_1_cast_fp16 = slice_by_index(begin = K_for_attn_1_begin_0, end = K_for_attn_1_end_0, end_mask = K_for_attn_1_end_mask_0, x = var_1052_cast_fp16)[name = string("K_for_attn_1_cast_fp16")]; + tensor var_1062_begin_0 = const()[name = string("op_1062_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1062_end_0 = const()[name = string("op_1062_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1062_end_mask_0 = const()[name = string("op_1062_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1062_cast_fp16 = slice_by_index(begin = var_1062_begin_0, end = var_1062_end_0, end_mask = var_1062_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("op_1062_cast_fp16")]; + tensor V_for_attn_1_begin_0 = const()[name = string("V_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_1_end_0 = const()[name = string("V_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_1_end_mask_0 = const()[name = string("V_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_1_cast_fp16 = slice_by_index(begin = V_for_attn_1_begin_0, end = V_for_attn_1_end_0, end_mask = V_for_attn_1_end_mask_0, x = var_1062_cast_fp16)[name = string("V_for_attn_1_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = K_for_attn_1_cast_fp16)[name = string("transpose_233")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_232")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = V_for_attn_1_cast_fp16)[name = string("transpose_231")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_230")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_50_cast_fp16 = transpose(perm = transpose_50_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_229")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_11_cast_fp16, y = transpose_50_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_7_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_7_cast_fp16)[name = string("reduce_max_0")]; + tensor var_1097 = sub(x = x_7_cast_fp16, y = reduce_max_0)[name = string("op_1097")]; + tensor var_1103 = exp(x = var_1097)[name = string("op_1103")]; + tensor var_1113_axes_0 = const()[name = string("op_1113_axes_0"), val = tensor([-1])]; + bool var_1113_keep_dims_0 = const()[name = string("op_1113_keep_dims_0"), val = bool(true)]; + tensor var_1113 = reduce_sum(axes = var_1113_axes_0, keep_dims = var_1113_keep_dims_0, x = var_1103)[name = string("op_1113")]; + tensor var_1119_cast_fp16 = real_div(x = var_1103, y = var_1113)[name = string("op_1119_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_228")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_1119_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_1130 = const()[name = string("op_1130"), val = tensor([0, 2, 1, 3])]; + tensor var_1137 = const()[name = string("op_1137"), val = tensor([1, 3, -1])]; + tensor var_1131_cast_fp16 = transpose(perm = var_1130, x = attn_output_1_cast_fp16)[name = string("transpose_227")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_1137, x = var_1131_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_1142 = const()[name = string("op_1142"), val = tensor([0, 2, 1])]; + string var_1158_pad_type_0 = const()[name = string("op_1158_pad_type_0"), val = string("valid")]; + int32 var_1158_groups_0 = const()[name = string("op_1158_groups_0"), val = int32(1)]; + tensor var_1158_strides_0 = const()[name = string("op_1158_strides_0"), val = tensor([1])]; + tensor var_1158_pad_0 = const()[name = string("op_1158_pad_0"), val = tensor([0, 0])]; + tensor var_1158_dilations_0 = const()[name = string("op_1158_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531256512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533878016))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1143_cast_fp16 = transpose(perm = var_1142, x = attn_output_3_cast_fp16)[name = string("transpose_226")]; + tensor var_1158_cast_fp16 = conv(dilations = var_1158_dilations_0, groups = var_1158_groups_0, pad = var_1158_pad_0, pad_type = var_1158_pad_type_0, strides = var_1158_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1143_cast_fp16)[name = string("op_1158_cast_fp16")]; + tensor var_1162 = const()[name = string("op_1162"), val = tensor([0, 2, 1])]; + int32 var_1168 = const()[name = string("op_1168"), val = int32(-1)]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_11_cast_fp16 = transpose(perm = var_1162, x = var_1158_cast_fp16)[name = string("transpose_225")]; + tensor var_1170_cast_fp16 = mul(x = x_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_1170_cast_fp16")]; + bool input_15_interleave_0 = const()[name = string("input_15_interleave_0"), val = bool(false)]; + tensor input_15_cast_fp16 = concat(axis = var_1168, interleave = input_15_interleave_0, values = (x_11_cast_fp16, var_1170_cast_fp16))[name = string("input_15_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_1165_to_fp16 = const()[name = string("op_1165_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_1165_to_fp16, x = input_15_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor var_1175_split_sizes_0 = const()[name = string("op_1175_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1175_axis_0 = const()[name = string("op_1175_axis_0"), val = int32(-1)]; + tensor var_1175_cast_fp16_0, tensor var_1175_cast_fp16_1 = split(axis = var_1175_axis_0, split_sizes = var_1175_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_1175_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533880640)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_1175_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_13_cast_fp16")]; + int32 var_1184 = const()[name = string("op_1184"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1186_cast_fp16 = mul(x = x_13_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_1186_cast_fp16")]; + bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; + tensor input_17_cast_fp16 = concat(axis = var_1184, interleave = input_17_interleave_0, values = (x_13_cast_fp16, var_1186_cast_fp16))[name = string("input_17_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_1181_to_fp16 = const()[name = string("op_1181_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_1181_to_fp16, x = input_17_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor var_1191_split_sizes_0 = const()[name = string("op_1191_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1191_axis_0 = const()[name = string("op_1191_axis_0"), val = int32(-1)]; + tensor var_1191_cast_fp16_0, tensor var_1191_cast_fp16_1 = split(axis = var_1191_axis_0, split_sizes = var_1191_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_1191_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533885824)))]; + tensor h_3_cast_fp16 = mul(x = var_1191_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_1202 = const()[name = string("op_1202"), val = tensor([0, 2, 1])]; + tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([2])]; + tensor var_1203 = transpose(perm = var_1202, x = h_3_cast_fp16)[name = string("transpose_224")]; + tensor input_19 = expand_dims(axes = input_19_axes_0, x = var_1203)[name = string("input_19")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_19)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_19)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_21 = mul(x = gate_3, y = up_1)[name = string("input_21")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_21)[name = string("mlp_out_1")]; + tensor var_1243_axes_0 = const()[name = string("op_1243_axes_0"), val = tensor([2])]; + tensor var_1243 = squeeze(axes = var_1243_axes_0, x = mlp_out_1)[name = string("op_1243")]; + tensor var_1247 = const()[name = string("op_1247"), val = tensor([0, 2, 1])]; + int32 var_1253 = const()[name = string("op_1253"), val = int32(-1)]; + fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; + tensor x_15 = transpose(perm = var_1247, x = var_1243)[name = string("transpose_223")]; + tensor var_1255 = mul(x = x_15, y = const_9_promoted)[name = string("op_1255")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23 = concat(axis = var_1253, interleave = input_23_interleave_0, values = (x_15, var_1255))[name = string("input_23")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_1250_to_fp16 = const()[name = string("op_1250_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_1250_to_fp16, x = input_23)[name = string("normed_21_cast_fp16")]; + tensor var_1260_split_sizes_0 = const()[name = string("op_1260_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1260_axis_0 = const()[name = string("op_1260_axis_0"), val = int32(-1)]; + tensor var_1260_0, tensor var_1260_1 = split(axis = var_1260_axis_0, split_sizes = var_1260_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_1260")]; + tensor hidden_states_3 = mul(x = var_1260_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_13_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 3072])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 3, 3328])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_1288 = const()[name = string("op_1288"), val = tensor([0, 2, 1])]; + tensor input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor([2])]; + tensor var_1289 = transpose(perm = var_1288, x = hidden_states_5_cast_fp16)[name = string("transpose_222")]; + tensor input_25 = expand_dims(axes = input_25_axes_0, x = var_1289)[name = string("input_25")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_25)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_1308 = const()[name = string("op_1308"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_1309_cast_fp16 = transpose(perm = var_1308, x = per_layer_slice_1_cast_fp16)[name = string("transpose_221")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_1309_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_27_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_27_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533891008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534218752))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_27_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_1325_axes_0 = const()[name = string("op_1325_axes_0"), val = tensor([2])]; + tensor var_1325_cast_fp16 = squeeze(axes = var_1325_axes_0, x = gated_5_cast_fp16)[name = string("op_1325_cast_fp16")]; + tensor var_1329 = const()[name = string("op_1329"), val = tensor([0, 2, 1])]; + int32 var_1335 = const()[name = string("op_1335"), val = int32(-1)]; + fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_17_cast_fp16 = transpose(perm = var_1329, x = var_1325_cast_fp16)[name = string("transpose_220")]; + tensor var_1337_cast_fp16 = mul(x = x_17_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_1337_cast_fp16")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29_cast_fp16 = concat(axis = var_1335, interleave = input_29_interleave_0, values = (x_17_cast_fp16, var_1337_cast_fp16))[name = string("input_29_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_1332_to_fp16 = const()[name = string("op_1332_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1332_to_fp16, x = input_29_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor var_1342_split_sizes_0 = const()[name = string("op_1342_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1342_axis_0 = const()[name = string("op_1342_axis_0"), val = int32(-1)]; + tensor var_1342_cast_fp16_0, tensor var_1342_cast_fp16_1 = split(axis = var_1342_axis_0, split_sizes = var_1342_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1342_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534221376)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_1342_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = tensor([0x1.7ep-1])]; + tensor x_19_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_11_promoted_to_fp16)[name = string("x_19_cast_fp16")]; + int32 var_1357 = const()[name = string("op_1357"), val = int32(-1)]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1359_cast_fp16 = mul(x = x_19_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1359_cast_fp16")]; + bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; + tensor input_31_cast_fp16 = concat(axis = var_1357, interleave = input_31_interleave_0, values = (x_19_cast_fp16, var_1359_cast_fp16))[name = string("input_31_cast_fp16")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_1354_to_fp16 = const()[name = string("op_1354_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1354_to_fp16, x = input_31_cast_fp16)[name = string("normed_29_cast_fp16")]; + tensor var_1364_split_sizes_0 = const()[name = string("op_1364_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1364_axis_0 = const()[name = string("op_1364_axis_0"), val = int32(-1)]; + tensor var_1364_cast_fp16_0, tensor var_1364_cast_fp16_1 = split(axis = var_1364_axis_0, split_sizes = var_1364_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1364_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534226560)))]; + tensor h_7_cast_fp16 = mul(x = var_1364_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_1370 = const()[name = string("op_1370"), val = tensor([0, 2, 1])]; + tensor var_1373_axes_0 = const()[name = string("op_1373_axes_0"), val = tensor([2])]; + tensor var_1371_cast_fp16 = transpose(perm = var_1370, x = h_7_cast_fp16)[name = string("transpose_219")]; + tensor var_1373_cast_fp16 = expand_dims(axes = var_1373_axes_0, x = var_1371_cast_fp16)[name = string("op_1373_cast_fp16")]; + string q_13_pad_type_0 = const()[name = string("q_13_pad_type_0"), val = string("valid")]; + tensor q_13_strides_0 = const()[name = string("q_13_strides_0"), val = tensor([1, 1])]; + tensor q_13_pad_0 = const()[name = string("q_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_13_dilations_0 = const()[name = string("q_13_dilations_0"), val = tensor([1, 1])]; + int32 q_13_groups_0 = const()[name = string("q_13_groups_0"), val = int32(1)]; + tensor q_13 = conv(dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_1373_cast_fp16)[name = string("q_13")]; + tensor var_1394 = const()[name = string("op_1394"), val = tensor([1, 8, 256, 3])]; + tensor var_1395 = reshape(shape = var_1394, x = q_13)[name = string("op_1395")]; + tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_1418 = const()[name = string("op_1418"), val = tensor([3, 8, 256])]; + tensor transpose_51 = transpose(perm = transpose_51_perm_0, x = var_1395)[name = string("transpose_218")]; + tensor x_21 = reshape(shape = var_1418, x = transpose_51)[name = string("x_21")]; + int32 var_1424 = const()[name = string("op_1424"), val = int32(-1)]; + fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; + tensor var_1426 = mul(x = x_21, y = const_13_promoted)[name = string("op_1426")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35 = concat(axis = var_1424, interleave = input_35_interleave_0, values = (x_21, var_1426))[name = string("input_35")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1421_to_fp16 = const()[name = string("op_1421_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1421_to_fp16, x = input_35)[name = string("normed_33_cast_fp16")]; + tensor var_1431_split_sizes_0 = const()[name = string("op_1431_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1431_axis_0 = const()[name = string("op_1431_axis_0"), val = int32(-1)]; + tensor var_1431_0, tensor var_1431_1 = split(axis = var_1431_axis_0, split_sizes = var_1431_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1431")]; + tensor q_17 = mul(x = var_1431_0, y = layers_1_self_attn_q_norm_weight)[name = string("q_17")]; + tensor var_1438 = const()[name = string("op_1438"), val = tensor([1, 3, 8, 256])]; + tensor var_1439 = reshape(shape = var_1438, x = q_17)[name = string("op_1439")]; + tensor var_1444 = const()[name = string("op_1444"), val = tensor([0, 2, 1, 3])]; + tensor q_19 = transpose(perm = var_1444, x = var_1439)[name = string("transpose_217")]; + tensor var_1446_cast_fp16 = mul(x = q_19, y = cos_s)[name = string("op_1446_cast_fp16")]; + tensor var_1447_split_sizes_0 = const()[name = string("op_1447_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1447_axis_0 = const()[name = string("op_1447_axis_0"), val = int32(-1)]; + tensor var_1447_0, tensor var_1447_1 = split(axis = var_1447_axis_0, split_sizes = var_1447_split_sizes_0, x = q_19)[name = string("op_1447")]; + fp16 const_14_promoted = const()[name = string("const_14_promoted"), val = fp16(-0x1p+0)]; + tensor var_1449 = mul(x = var_1447_1, y = const_14_promoted)[name = string("op_1449")]; + int32 var_1451 = const()[name = string("op_1451"), val = int32(-1)]; + bool var_1452_interleave_0 = const()[name = string("op_1452_interleave_0"), val = bool(false)]; + tensor var_1452 = concat(axis = var_1451, interleave = var_1452_interleave_0, values = (var_1449, var_1447_0))[name = string("op_1452")]; + tensor var_1453_cast_fp16 = mul(x = var_1452, y = sin_s)[name = string("op_1453_cast_fp16")]; + tensor q_23_cast_fp16 = add(x = var_1446_cast_fp16, y = var_1453_cast_fp16)[name = string("q_23_cast_fp16")]; + string k_7_pad_type_0 = const()[name = string("k_7_pad_type_0"), val = string("valid")]; + tensor k_7_strides_0 = const()[name = string("k_7_strides_0"), val = tensor([1, 1])]; + tensor k_7_pad_0 = const()[name = string("k_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_7_dilations_0 = const()[name = string("k_7_dilations_0"), val = tensor([1, 1])]; + int32 k_7_groups_0 = const()[name = string("k_7_groups_0"), val = int32(1)]; + tensor k_7 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = layers_1_self_attn_k_proj_weight_palettized, x = var_1373_cast_fp16)[name = string("k_7")]; + tensor var_1471 = const()[name = string("op_1471"), val = tensor([1, 2, 256, 3])]; + tensor var_1472 = reshape(shape = var_1471, x = k_7)[name = string("op_1472")]; + tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_3_pad_type_0 = const()[name = string("v_3_pad_type_0"), val = string("valid")]; + tensor v_3_strides_0 = const()[name = string("v_3_strides_0"), val = tensor([1, 1])]; + tensor v_3_pad_0 = const()[name = string("v_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_3_dilations_0 = const()[name = string("v_3_dilations_0"), val = tensor([1, 1])]; + int32 v_3_groups_0 = const()[name = string("v_3_groups_0"), val = int32(1)]; + tensor v_3 = conv(dilations = v_3_dilations_0, groups = v_3_groups_0, pad = v_3_pad_0, pad_type = v_3_pad_type_0, strides = v_3_strides_0, weight = layers_1_self_attn_v_proj_weight_palettized, x = var_1373_cast_fp16)[name = string("v_3")]; + tensor var_1499 = const()[name = string("op_1499"), val = tensor([1, 2, 256, 3])]; + tensor var_1500 = reshape(shape = var_1499, x = v_3)[name = string("op_1500")]; + tensor var_1505 = const()[name = string("op_1505"), val = tensor([0, 1, 3, 2])]; + tensor var_1523 = const()[name = string("op_1523"), val = tensor([3, 2, 256])]; + tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = var_1472)[name = string("transpose_216")]; + tensor x_23 = reshape(shape = var_1523, x = transpose_52)[name = string("x_23")]; + int32 var_1529 = const()[name = string("op_1529"), val = int32(-1)]; + fp16 const_15_promoted = const()[name = string("const_15_promoted"), val = fp16(-0x1p+0)]; + tensor var_1531 = mul(x = x_23, y = const_15_promoted)[name = string("op_1531")]; + bool input_37_interleave_0 = const()[name = string("input_37_interleave_0"), val = bool(false)]; + tensor input_37 = concat(axis = var_1529, interleave = input_37_interleave_0, values = (x_23, var_1531))[name = string("input_37")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1526_to_fp16 = const()[name = string("op_1526_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1526_to_fp16, x = input_37)[name = string("normed_37_cast_fp16")]; + tensor var_1536_split_sizes_0 = const()[name = string("op_1536_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1536_axis_0 = const()[name = string("op_1536_axis_0"), val = int32(-1)]; + tensor var_1536_0, tensor var_1536_1 = split(axis = var_1536_axis_0, split_sizes = var_1536_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1536")]; + tensor k_11 = mul(x = var_1536_0, y = layers_1_self_attn_k_norm_weight)[name = string("k_11")]; + tensor var_1543 = const()[name = string("op_1543"), val = tensor([1, 3, 2, 256])]; + tensor var_1544 = reshape(shape = var_1543, x = k_11)[name = string("op_1544")]; + tensor var_1549 = const()[name = string("op_1549"), val = tensor([0, 2, 1, 3])]; + fp16 var_1551_promoted = const()[name = string("op_1551_promoted"), val = fp16(0x1p+1)]; + tensor var_1506 = transpose(perm = var_1505, x = var_1500)[name = string("transpose_215")]; + tensor var_1552 = pow(x = var_1506, y = var_1551_promoted)[name = string("op_1552")]; + tensor var_1557_axes_0 = const()[name = string("op_1557_axes_0"), val = tensor([-1])]; + bool var_1557_keep_dims_0 = const()[name = string("op_1557_keep_dims_0"), val = bool(true)]; + tensor var_1557 = reduce_mean(axes = var_1557_axes_0, keep_dims = var_1557_keep_dims_0, x = var_1552)[name = string("op_1557")]; + fp16 var_1559_to_fp16 = const()[name = string("op_1559_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_3_cast_fp16 = add(x = var_1557, y = var_1559_to_fp16)[name = string("mean_sq_3_cast_fp16")]; + fp32 var_1561_epsilon_0 = const()[name = string("op_1561_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1561_cast_fp16 = rsqrt(epsilon = var_1561_epsilon_0, x = mean_sq_3_cast_fp16)[name = string("op_1561_cast_fp16")]; + tensor input_41_cast_fp16 = mul(x = var_1506, y = var_1561_cast_fp16)[name = string("input_41_cast_fp16")]; + tensor q_21 = transpose(perm = var_1549, x = var_1544)[name = string("transpose_214")]; + tensor var_1563_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_1563_cast_fp16")]; + tensor var_1564_split_sizes_0 = const()[name = string("op_1564_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1564_axis_0 = const()[name = string("op_1564_axis_0"), val = int32(-1)]; + tensor var_1564_0, tensor var_1564_1 = split(axis = var_1564_axis_0, split_sizes = var_1564_split_sizes_0, x = q_21)[name = string("op_1564")]; + fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)]; + tensor var_1566 = mul(x = var_1564_1, y = const_16_promoted)[name = string("op_1566")]; + int32 var_1568 = const()[name = string("op_1568"), val = int32(-1)]; + bool var_1569_interleave_0 = const()[name = string("op_1569_interleave_0"), val = bool(false)]; + tensor var_1569 = concat(axis = var_1568, interleave = var_1569_interleave_0, values = (var_1566, var_1564_0))[name = string("op_1569")]; + tensor var_1570_cast_fp16 = mul(x = var_1569, y = sin_s)[name = string("op_1570_cast_fp16")]; + tensor input_39_cast_fp16 = add(x = var_1563_cast_fp16, y = var_1570_cast_fp16)[name = string("input_39_cast_fp16")]; + tensor k_padded_3_pad_0 = const()[name = string("k_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_3_mode_0 = const()[name = string("k_padded_3_mode_0"), val = string("constant")]; + fp16 const_17_to_fp16 = const()[name = string("const_17_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_3_cast_fp16 = pad(constant_val = const_17_to_fp16, mode = k_padded_3_mode_0, pad = k_padded_3_pad_0, x = input_39_cast_fp16)[name = string("k_padded_3_cast_fp16")]; + tensor v_padded_3_pad_0 = const()[name = string("v_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_3_mode_0 = const()[name = string("v_padded_3_mode_0"), val = string("constant")]; + fp16 const_18_to_fp16 = const()[name = string("const_18_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_3_cast_fp16 = pad(constant_val = const_18_to_fp16, mode = v_padded_3_mode_0, pad = v_padded_3_pad_0, x = input_41_cast_fp16)[name = string("v_padded_3_cast_fp16")]; + tensor slot_k_3_begin_0 = const()[name = string("slot_k_3_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor slot_k_3_end_0 = const()[name = string("slot_k_3_end_0"), val = tensor([2, 2, 512, 512])]; + tensor slot_k_3_end_mask_0 = const()[name = string("slot_k_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_3_cast_fp16 = slice_by_index(begin = slot_k_3_begin_0, end = slot_k_3_end_0, end_mask = slot_k_3_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("slot_k_3_cast_fp16")]; + tensor slot_v_3_begin_0 = const()[name = string("slot_v_3_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor slot_v_3_end_0 = const()[name = string("slot_v_3_end_0"), val = tensor([2, 2, 512, 512])]; + tensor slot_v_3_end_mask_0 = const()[name = string("slot_v_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_3_cast_fp16 = slice_by_index(begin = slot_v_3_begin_0, end = slot_v_3_end_0, end_mask = slot_v_3_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("slot_v_3_cast_fp16")]; + tensor var_1609_begin_0 = const()[name = string("op_1609_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_1609_end_0 = const()[name = string("op_1609_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1609_end_mask_0 = const()[name = string("op_1609_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1609_cast_fp16 = slice_by_index(begin = var_1609_begin_0, end = var_1609_end_0, end_mask = var_1609_end_mask_0, x = slot_k_3_cast_fp16)[name = string("op_1609_cast_fp16")]; + int32 var_1616 = const()[name = string("op_1616"), val = int32(2)]; + bool new_k_3_interleave_0 = const()[name = string("new_k_3_interleave_0"), val = bool(false)]; + tensor new_k_3_cast_fp16 = concat(axis = var_1616, interleave = new_k_3_interleave_0, values = (var_1609_cast_fp16, k_padded_3_cast_fp16))[name = string("new_k_3_cast_fp16")]; + tensor var_1632_begin_0 = const()[name = string("op_1632_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_1632_end_0 = const()[name = string("op_1632_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1632_end_mask_0 = const()[name = string("op_1632_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1632_cast_fp16 = slice_by_index(begin = var_1632_begin_0, end = var_1632_end_0, end_mask = var_1632_end_mask_0, x = slot_v_3_cast_fp16)[name = string("op_1632_cast_fp16")]; + int32 var_1639 = const()[name = string("op_1639"), val = int32(2)]; + bool new_v_3_interleave_0 = const()[name = string("new_v_3_interleave_0"), val = bool(false)]; + tensor new_v_3_cast_fp16 = concat(axis = var_1639, interleave = new_v_3_interleave_0, values = (var_1632_cast_fp16, v_padded_3_cast_fp16))[name = string("new_v_3_cast_fp16")]; + tensor var_1650_begin_0 = const()[name = string("op_1650_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_1650_end_0 = const()[name = string("op_1650_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_1650_end_mask_0 = const()[name = string("op_1650_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1650_cast_fp16 = slice_by_index(begin = var_1650_begin_0, end = var_1650_end_0, end_mask = var_1650_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("op_1650_cast_fp16")]; + int32 var_1652 = const()[name = string("op_1652"), val = int32(0)]; + bool K_sliding_out_3_interleave_0 = const()[name = string("K_sliding_out_3_interleave_0"), val = bool(false)]; + tensor K_sliding_out_3_cast_fp16 = concat(axis = var_1652, interleave = K_sliding_out_3_interleave_0, values = (var_1052_cast_fp16, new_k_3_cast_fp16, var_1650_cast_fp16))[name = string("K_sliding_out_3_cast_fp16")]; + tensor var_1663_begin_0 = const()[name = string("op_1663_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_1663_end_0 = const()[name = string("op_1663_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_1663_end_mask_0 = const()[name = string("op_1663_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1663_cast_fp16 = slice_by_index(begin = var_1663_begin_0, end = var_1663_end_0, end_mask = var_1663_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("op_1663_cast_fp16")]; + int32 var_1665 = const()[name = string("op_1665"), val = int32(0)]; + bool V_sliding_out_3_interleave_0 = const()[name = string("V_sliding_out_3_interleave_0"), val = bool(false)]; + tensor V_sliding_out_3_cast_fp16 = concat(axis = var_1665, interleave = V_sliding_out_3_interleave_0, values = (var_1062_cast_fp16, new_v_3_cast_fp16, var_1663_cast_fp16))[name = string("V_sliding_out_3_cast_fp16")]; + tensor var_1671_begin_0 = const()[name = string("op_1671_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1671_end_0 = const()[name = string("op_1671_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1671_end_mask_0 = const()[name = string("op_1671_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1671_cast_fp16 = slice_by_index(begin = var_1671_begin_0, end = var_1671_end_0, end_mask = var_1671_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("op_1671_cast_fp16")]; + tensor K_for_attn_3_begin_0 = const()[name = string("K_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_3_end_0 = const()[name = string("K_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_3_end_mask_0 = const()[name = string("K_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_3_cast_fp16 = slice_by_index(begin = K_for_attn_3_begin_0, end = K_for_attn_3_end_0, end_mask = K_for_attn_3_end_mask_0, x = var_1671_cast_fp16)[name = string("K_for_attn_3_cast_fp16")]; + tensor var_1681_begin_0 = const()[name = string("op_1681_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1681_end_0 = const()[name = string("op_1681_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1681_end_mask_0 = const()[name = string("op_1681_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1681_cast_fp16 = slice_by_index(begin = var_1681_begin_0, end = var_1681_end_0, end_mask = var_1681_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("op_1681_cast_fp16")]; + tensor V_for_attn_3_begin_0 = const()[name = string("V_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_3_end_0 = const()[name = string("V_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_3_end_mask_0 = const()[name = string("V_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_3_cast_fp16 = slice_by_index(begin = V_for_attn_3_begin_0, end = V_for_attn_3_end_0, end_mask = V_for_attn_3_end_mask_0, x = var_1681_cast_fp16)[name = string("V_for_attn_3_cast_fp16")]; + tensor transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = K_for_attn_3_cast_fp16)[name = string("transpose_213")]; + tensor tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_4, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")]; + tensor transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_212")]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_5, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = V_for_attn_3_cast_fp16)[name = string("transpose_211")]; + tensor tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_6, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_210")]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_7, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")]; + tensor V_expanded_3_perm_0 = const()[name = string("V_expanded_3_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor transpose_53_cast_fp16 = transpose(perm = transpose_53_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_209")]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_23_cast_fp16, y = transpose_53_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_27_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_27_cast_fp16)[name = string("reduce_max_1")]; + tensor var_1716 = sub(x = x_27_cast_fp16, y = reduce_max_1)[name = string("op_1716")]; + tensor var_1722 = exp(x = var_1716)[name = string("op_1722")]; + tensor var_1732_axes_0 = const()[name = string("op_1732_axes_0"), val = tensor([-1])]; + bool var_1732_keep_dims_0 = const()[name = string("op_1732_keep_dims_0"), val = bool(true)]; + tensor var_1732 = reduce_sum(axes = var_1732_axes_0, keep_dims = var_1732_keep_dims_0, x = var_1722)[name = string("op_1732")]; + tensor var_1738_cast_fp16 = real_div(x = var_1722, y = var_1732)[name = string("op_1738_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor V_expanded_3_cast_fp16 = transpose(perm = V_expanded_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_208")]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_1738_cast_fp16, y = V_expanded_3_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_1749 = const()[name = string("op_1749"), val = tensor([0, 2, 1, 3])]; + tensor var_1756 = const()[name = string("op_1756"), val = tensor([1, 3, -1])]; + tensor var_1750_cast_fp16 = transpose(perm = var_1749, x = attn_output_7_cast_fp16)[name = string("transpose_207")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_1756, x = var_1750_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_1761 = const()[name = string("op_1761"), val = tensor([0, 2, 1])]; + string var_1777_pad_type_0 = const()[name = string("op_1777_pad_type_0"), val = string("valid")]; + int32 var_1777_groups_0 = const()[name = string("op_1777_groups_0"), val = int32(1)]; + tensor var_1777_strides_0 = const()[name = string("op_1777_strides_0"), val = tensor([1])]; + tensor var_1777_pad_0 = const()[name = string("op_1777_pad_0"), val = tensor([0, 0])]; + tensor var_1777_dilations_0 = const()[name = string("op_1777_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534231744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536853248))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1762_cast_fp16 = transpose(perm = var_1761, x = attn_output_9_cast_fp16)[name = string("transpose_206")]; + tensor var_1777_cast_fp16 = conv(dilations = var_1777_dilations_0, groups = var_1777_groups_0, pad = var_1777_pad_0, pad_type = var_1777_pad_type_0, strides = var_1777_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1762_cast_fp16)[name = string("op_1777_cast_fp16")]; + tensor var_1781 = const()[name = string("op_1781"), val = tensor([0, 2, 1])]; + int32 var_1787 = const()[name = string("op_1787"), val = int32(-1)]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_31_cast_fp16 = transpose(perm = var_1781, x = var_1777_cast_fp16)[name = string("transpose_205")]; + tensor var_1789_cast_fp16 = mul(x = x_31_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1789_cast_fp16")]; + bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; + tensor input_45_cast_fp16 = concat(axis = var_1787, interleave = input_45_interleave_0, values = (x_31_cast_fp16, var_1789_cast_fp16))[name = string("input_45_cast_fp16")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1784_to_fp16 = const()[name = string("op_1784_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1784_to_fp16, x = input_45_cast_fp16)[name = string("normed_41_cast_fp16")]; + tensor var_1794_split_sizes_0 = const()[name = string("op_1794_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1794_axis_0 = const()[name = string("op_1794_axis_0"), val = int32(-1)]; + tensor var_1794_cast_fp16_0, tensor var_1794_cast_fp16_1 = split(axis = var_1794_axis_0, split_sizes = var_1794_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1794_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536855872)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1794_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_19_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_33_cast_fp16")]; + int32 var_1803 = const()[name = string("op_1803"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1805_cast_fp16 = mul(x = x_33_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1805_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_1803, interleave = input_47_interleave_0, values = (x_33_cast_fp16, var_1805_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1800_to_fp16 = const()[name = string("op_1800_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1800_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1810_split_sizes_0 = const()[name = string("op_1810_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1810_axis_0 = const()[name = string("op_1810_axis_0"), val = int32(-1)]; + tensor var_1810_cast_fp16_0, tensor var_1810_cast_fp16_1 = split(axis = var_1810_axis_0, split_sizes = var_1810_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1810_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536861056)))]; + tensor h_9_cast_fp16 = mul(x = var_1810_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1821 = const()[name = string("op_1821"), val = tensor([0, 2, 1])]; + tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; + tensor var_1822 = transpose(perm = var_1821, x = h_9_cast_fp16)[name = string("transpose_204")]; + tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_1822)[name = string("input_49")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_49)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_49)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_51 = mul(x = gate_7, y = up_3)[name = string("input_51")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_51)[name = string("mlp_out_3")]; + tensor var_1862_axes_0 = const()[name = string("op_1862_axes_0"), val = tensor([2])]; + tensor var_1862 = squeeze(axes = var_1862_axes_0, x = mlp_out_3)[name = string("op_1862")]; + tensor var_1866 = const()[name = string("op_1866"), val = tensor([0, 2, 1])]; + int32 var_1872 = const()[name = string("op_1872"), val = int32(-1)]; + fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; + tensor x_35 = transpose(perm = var_1866, x = var_1862)[name = string("transpose_203")]; + tensor var_1874 = mul(x = x_35, y = const_21_promoted)[name = string("op_1874")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53 = concat(axis = var_1872, interleave = input_53_interleave_0, values = (x_35, var_1874))[name = string("input_53")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1869_to_fp16 = const()[name = string("op_1869_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1869_to_fp16, x = input_53)[name = string("normed_49_cast_fp16")]; + tensor var_1879_split_sizes_0 = const()[name = string("op_1879_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1879_axis_0 = const()[name = string("op_1879_axis_0"), val = int32(-1)]; + tensor var_1879_0, tensor var_1879_1 = split(axis = var_1879_axis_0, split_sizes = var_1879_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1879")]; + tensor hidden_states_13 = mul(x = var_1879_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_33_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 3328])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 3, 3584])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1907 = const()[name = string("op_1907"), val = tensor([0, 2, 1])]; + tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; + tensor var_1908 = transpose(perm = var_1907, x = hidden_states_15_cast_fp16)[name = string("transpose_202")]; + tensor input_55 = expand_dims(axes = input_55_axes_0, x = var_1908)[name = string("input_55")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_55)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1927 = const()[name = string("op_1927"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1928_cast_fp16 = transpose(perm = var_1927, x = per_layer_slice_3_cast_fp16)[name = string("transpose_201")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1928_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_57_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_57_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536866240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537193984))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_1944_axes_0 = const()[name = string("op_1944_axes_0"), val = tensor([2])]; + tensor var_1944_cast_fp16 = squeeze(axes = var_1944_axes_0, x = gated_11_cast_fp16)[name = string("op_1944_cast_fp16")]; + tensor var_1948 = const()[name = string("op_1948"), val = tensor([0, 2, 1])]; + int32 var_1954 = const()[name = string("op_1954"), val = int32(-1)]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_37_cast_fp16 = transpose(perm = var_1948, x = var_1944_cast_fp16)[name = string("transpose_200")]; + tensor var_1956_cast_fp16 = mul(x = x_37_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1956_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_1954, interleave = input_59_interleave_0, values = (x_37_cast_fp16, var_1956_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1951_to_fp16 = const()[name = string("op_1951_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1951_to_fp16, x = input_59_cast_fp16)[name = string("normed_53_cast_fp16")]; + tensor var_1961_split_sizes_0 = const()[name = string("op_1961_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1961_axis_0 = const()[name = string("op_1961_axis_0"), val = int32(-1)]; + tensor var_1961_cast_fp16_0, tensor var_1961_cast_fp16_1 = split(axis = var_1961_axis_0, split_sizes = var_1961_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1961_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537196608)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_1961_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor([0x1.6cp-1])]; + tensor x_39_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_39_cast_fp16")]; + int32 var_1976 = const()[name = string("op_1976"), val = int32(-1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1978_cast_fp16 = mul(x = x_39_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1978_cast_fp16")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61_cast_fp16 = concat(axis = var_1976, interleave = input_61_interleave_0, values = (x_39_cast_fp16, var_1978_cast_fp16))[name = string("input_61_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_1973_to_fp16 = const()[name = string("op_1973_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1973_to_fp16, x = input_61_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_1983_split_sizes_0 = const()[name = string("op_1983_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1983_axis_0 = const()[name = string("op_1983_axis_0"), val = int32(-1)]; + tensor var_1983_cast_fp16_0, tensor var_1983_cast_fp16_1 = split(axis = var_1983_axis_0, split_sizes = var_1983_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1983_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537201792)))]; + tensor h_13_cast_fp16 = mul(x = var_1983_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_1989 = const()[name = string("op_1989"), val = tensor([0, 2, 1])]; + tensor var_1992_axes_0 = const()[name = string("op_1992_axes_0"), val = tensor([2])]; + tensor var_1990_cast_fp16 = transpose(perm = var_1989, x = h_13_cast_fp16)[name = string("transpose_199")]; + tensor var_1992_cast_fp16 = expand_dims(axes = var_1992_axes_0, x = var_1990_cast_fp16)[name = string("op_1992_cast_fp16")]; + string q_25_pad_type_0 = const()[name = string("q_25_pad_type_0"), val = string("valid")]; + tensor q_25_strides_0 = const()[name = string("q_25_strides_0"), val = tensor([1, 1])]; + tensor q_25_pad_0 = const()[name = string("q_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_25_dilations_0 = const()[name = string("q_25_dilations_0"), val = tensor([1, 1])]; + int32 q_25_groups_0 = const()[name = string("q_25_groups_0"), val = int32(1)]; + tensor q_25 = conv(dilations = q_25_dilations_0, groups = q_25_groups_0, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = q_25_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1992_cast_fp16)[name = string("q_25")]; + tensor var_2013 = const()[name = string("op_2013"), val = tensor([1, 8, 256, 3])]; + tensor var_2014 = reshape(shape = var_2013, x = q_25)[name = string("op_2014")]; + tensor transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2037 = const()[name = string("op_2037"), val = tensor([3, 8, 256])]; + tensor transpose_54 = transpose(perm = transpose_54_perm_0, x = var_2014)[name = string("transpose_198")]; + tensor x_41 = reshape(shape = var_2037, x = transpose_54)[name = string("x_41")]; + int32 var_2043 = const()[name = string("op_2043"), val = int32(-1)]; + fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; + tensor var_2045 = mul(x = x_41, y = const_25_promoted)[name = string("op_2045")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_2043, interleave = input_65_interleave_0, values = (x_41, var_2045))[name = string("input_65")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_2040_to_fp16 = const()[name = string("op_2040_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_2040_to_fp16, x = input_65)[name = string("normed_61_cast_fp16")]; + tensor var_2050_split_sizes_0 = const()[name = string("op_2050_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2050_axis_0 = const()[name = string("op_2050_axis_0"), val = int32(-1)]; + tensor var_2050_0, tensor var_2050_1 = split(axis = var_2050_axis_0, split_sizes = var_2050_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_2050")]; + tensor q_29 = mul(x = var_2050_0, y = layers_2_self_attn_q_norm_weight)[name = string("q_29")]; + tensor var_2057 = const()[name = string("op_2057"), val = tensor([1, 3, 8, 256])]; + tensor var_2058 = reshape(shape = var_2057, x = q_29)[name = string("op_2058")]; + tensor var_2063 = const()[name = string("op_2063"), val = tensor([0, 2, 1, 3])]; + tensor q_31 = transpose(perm = var_2063, x = var_2058)[name = string("transpose_197")]; + tensor var_2065_cast_fp16 = mul(x = q_31, y = cos_s)[name = string("op_2065_cast_fp16")]; + tensor var_2066_split_sizes_0 = const()[name = string("op_2066_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2066_axis_0 = const()[name = string("op_2066_axis_0"), val = int32(-1)]; + tensor var_2066_0, tensor var_2066_1 = split(axis = var_2066_axis_0, split_sizes = var_2066_split_sizes_0, x = q_31)[name = string("op_2066")]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_2068 = mul(x = var_2066_1, y = const_26_promoted)[name = string("op_2068")]; + int32 var_2070 = const()[name = string("op_2070"), val = int32(-1)]; + bool var_2071_interleave_0 = const()[name = string("op_2071_interleave_0"), val = bool(false)]; + tensor var_2071 = concat(axis = var_2070, interleave = var_2071_interleave_0, values = (var_2068, var_2066_0))[name = string("op_2071")]; + tensor var_2072_cast_fp16 = mul(x = var_2071, y = sin_s)[name = string("op_2072_cast_fp16")]; + tensor q_35_cast_fp16 = add(x = var_2065_cast_fp16, y = var_2072_cast_fp16)[name = string("q_35_cast_fp16")]; + string k_13_pad_type_0 = const()[name = string("k_13_pad_type_0"), val = string("valid")]; + tensor k_13_strides_0 = const()[name = string("k_13_strides_0"), val = tensor([1, 1])]; + tensor k_13_pad_0 = const()[name = string("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_13_dilations_0 = const()[name = string("k_13_dilations_0"), val = tensor([1, 1])]; + int32 k_13_groups_0 = const()[name = string("k_13_groups_0"), val = int32(1)]; + tensor k_13 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = layers_2_self_attn_k_proj_weight_palettized, x = var_1992_cast_fp16)[name = string("k_13")]; + tensor var_2090 = const()[name = string("op_2090"), val = tensor([1, 2, 256, 3])]; + tensor var_2091 = reshape(shape = var_2090, x = k_13)[name = string("op_2091")]; + tensor transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_5_pad_type_0 = const()[name = string("v_5_pad_type_0"), val = string("valid")]; + tensor v_5_strides_0 = const()[name = string("v_5_strides_0"), val = tensor([1, 1])]; + tensor v_5_pad_0 = const()[name = string("v_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_5_dilations_0 = const()[name = string("v_5_dilations_0"), val = tensor([1, 1])]; + int32 v_5_groups_0 = const()[name = string("v_5_groups_0"), val = int32(1)]; + tensor v_5 = conv(dilations = v_5_dilations_0, groups = v_5_groups_0, pad = v_5_pad_0, pad_type = v_5_pad_type_0, strides = v_5_strides_0, weight = layers_2_self_attn_v_proj_weight_palettized, x = var_1992_cast_fp16)[name = string("v_5")]; + tensor var_2118 = const()[name = string("op_2118"), val = tensor([1, 2, 256, 3])]; + tensor var_2119 = reshape(shape = var_2118, x = v_5)[name = string("op_2119")]; + tensor var_2124 = const()[name = string("op_2124"), val = tensor([0, 1, 3, 2])]; + tensor var_2142 = const()[name = string("op_2142"), val = tensor([3, 2, 256])]; + tensor transpose_55 = transpose(perm = transpose_55_perm_0, x = var_2091)[name = string("transpose_196")]; + tensor x_43 = reshape(shape = var_2142, x = transpose_55)[name = string("x_43")]; + int32 var_2148 = const()[name = string("op_2148"), val = int32(-1)]; + fp16 const_27_promoted = const()[name = string("const_27_promoted"), val = fp16(-0x1p+0)]; + tensor var_2150 = mul(x = x_43, y = const_27_promoted)[name = string("op_2150")]; + bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; + tensor input_67 = concat(axis = var_2148, interleave = input_67_interleave_0, values = (x_43, var_2150))[name = string("input_67")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_2145_to_fp16 = const()[name = string("op_2145_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_2145_to_fp16, x = input_67)[name = string("normed_65_cast_fp16")]; + tensor var_2155_split_sizes_0 = const()[name = string("op_2155_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2155_axis_0 = const()[name = string("op_2155_axis_0"), val = int32(-1)]; + tensor var_2155_0, tensor var_2155_1 = split(axis = var_2155_axis_0, split_sizes = var_2155_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_2155")]; + tensor k_17 = mul(x = var_2155_0, y = layers_2_self_attn_k_norm_weight)[name = string("k_17")]; + tensor var_2162 = const()[name = string("op_2162"), val = tensor([1, 3, 2, 256])]; + tensor var_2163 = reshape(shape = var_2162, x = k_17)[name = string("op_2163")]; + tensor var_2168 = const()[name = string("op_2168"), val = tensor([0, 2, 1, 3])]; + fp16 var_2170_promoted = const()[name = string("op_2170_promoted"), val = fp16(0x1p+1)]; + tensor var_2125 = transpose(perm = var_2124, x = var_2119)[name = string("transpose_195")]; + tensor var_2171 = pow(x = var_2125, y = var_2170_promoted)[name = string("op_2171")]; + tensor var_2176_axes_0 = const()[name = string("op_2176_axes_0"), val = tensor([-1])]; + bool var_2176_keep_dims_0 = const()[name = string("op_2176_keep_dims_0"), val = bool(true)]; + tensor var_2176 = reduce_mean(axes = var_2176_axes_0, keep_dims = var_2176_keep_dims_0, x = var_2171)[name = string("op_2176")]; + fp16 var_2178_to_fp16 = const()[name = string("op_2178_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_5_cast_fp16 = add(x = var_2176, y = var_2178_to_fp16)[name = string("mean_sq_5_cast_fp16")]; + fp32 var_2180_epsilon_0 = const()[name = string("op_2180_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2180_cast_fp16 = rsqrt(epsilon = var_2180_epsilon_0, x = mean_sq_5_cast_fp16)[name = string("op_2180_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_2125, y = var_2180_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor q_33 = transpose(perm = var_2168, x = var_2163)[name = string("transpose_194")]; + tensor var_2182_cast_fp16 = mul(x = q_33, y = cos_s)[name = string("op_2182_cast_fp16")]; + tensor var_2183_split_sizes_0 = const()[name = string("op_2183_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2183_axis_0 = const()[name = string("op_2183_axis_0"), val = int32(-1)]; + tensor var_2183_0, tensor var_2183_1 = split(axis = var_2183_axis_0, split_sizes = var_2183_split_sizes_0, x = q_33)[name = string("op_2183")]; + fp16 const_28_promoted = const()[name = string("const_28_promoted"), val = fp16(-0x1p+0)]; + tensor var_2185 = mul(x = var_2183_1, y = const_28_promoted)[name = string("op_2185")]; + int32 var_2187 = const()[name = string("op_2187"), val = int32(-1)]; + bool var_2188_interleave_0 = const()[name = string("op_2188_interleave_0"), val = bool(false)]; + tensor var_2188 = concat(axis = var_2187, interleave = var_2188_interleave_0, values = (var_2185, var_2183_0))[name = string("op_2188")]; + tensor var_2189_cast_fp16 = mul(x = var_2188, y = sin_s)[name = string("op_2189_cast_fp16")]; + tensor input_69_cast_fp16 = add(x = var_2182_cast_fp16, y = var_2189_cast_fp16)[name = string("input_69_cast_fp16")]; + tensor k_padded_5_pad_0 = const()[name = string("k_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_5_mode_0 = const()[name = string("k_padded_5_mode_0"), val = string("constant")]; + fp16 const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_5_cast_fp16 = pad(constant_val = const_29_to_fp16, mode = k_padded_5_mode_0, pad = k_padded_5_pad_0, x = input_69_cast_fp16)[name = string("k_padded_5_cast_fp16")]; + tensor v_padded_5_pad_0 = const()[name = string("v_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_5_mode_0 = const()[name = string("v_padded_5_mode_0"), val = string("constant")]; + fp16 const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_5_cast_fp16 = pad(constant_val = const_30_to_fp16, mode = v_padded_5_mode_0, pad = v_padded_5_pad_0, x = input_71_cast_fp16)[name = string("v_padded_5_cast_fp16")]; + tensor slot_k_5_begin_0 = const()[name = string("slot_k_5_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor slot_k_5_end_0 = const()[name = string("slot_k_5_end_0"), val = tensor([3, 2, 512, 512])]; + tensor slot_k_5_end_mask_0 = const()[name = string("slot_k_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_5_cast_fp16 = slice_by_index(begin = slot_k_5_begin_0, end = slot_k_5_end_0, end_mask = slot_k_5_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("slot_k_5_cast_fp16")]; + tensor slot_v_5_begin_0 = const()[name = string("slot_v_5_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor slot_v_5_end_0 = const()[name = string("slot_v_5_end_0"), val = tensor([3, 2, 512, 512])]; + tensor slot_v_5_end_mask_0 = const()[name = string("slot_v_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_5_cast_fp16 = slice_by_index(begin = slot_v_5_begin_0, end = slot_v_5_end_0, end_mask = slot_v_5_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("slot_v_5_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = string("op_2228_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_2228_end_0 = const()[name = string("op_2228_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2228_end_mask_0 = const()[name = string("op_2228_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = slot_k_5_cast_fp16)[name = string("op_2228_cast_fp16")]; + int32 var_2235 = const()[name = string("op_2235"), val = int32(2)]; + bool new_k_5_interleave_0 = const()[name = string("new_k_5_interleave_0"), val = bool(false)]; + tensor new_k_5_cast_fp16 = concat(axis = var_2235, interleave = new_k_5_interleave_0, values = (var_2228_cast_fp16, k_padded_5_cast_fp16))[name = string("new_k_5_cast_fp16")]; + tensor var_2251_begin_0 = const()[name = string("op_2251_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_2251_end_0 = const()[name = string("op_2251_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2251_end_mask_0 = const()[name = string("op_2251_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2251_cast_fp16 = slice_by_index(begin = var_2251_begin_0, end = var_2251_end_0, end_mask = var_2251_end_mask_0, x = slot_v_5_cast_fp16)[name = string("op_2251_cast_fp16")]; + int32 var_2258 = const()[name = string("op_2258"), val = int32(2)]; + bool new_v_5_interleave_0 = const()[name = string("new_v_5_interleave_0"), val = bool(false)]; + tensor new_v_5_cast_fp16 = concat(axis = var_2258, interleave = new_v_5_interleave_0, values = (var_2251_cast_fp16, v_padded_5_cast_fp16))[name = string("new_v_5_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = string("op_2264_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2264_end_0 = const()[name = string("op_2264_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_2264_end_mask_0 = const()[name = string("op_2264_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("op_2264_cast_fp16")]; + tensor var_2269_begin_0 = const()[name = string("op_2269_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2269_end_0 = const()[name = string("op_2269_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_2269_end_mask_0 = const()[name = string("op_2269_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2269_cast_fp16 = slice_by_index(begin = var_2269_begin_0, end = var_2269_end_0, end_mask = var_2269_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("op_2269_cast_fp16")]; + int32 var_2271 = const()[name = string("op_2271"), val = int32(0)]; + bool K_sliding_out_5_interleave_0 = const()[name = string("K_sliding_out_5_interleave_0"), val = bool(false)]; + tensor K_sliding_out_5_cast_fp16 = concat(axis = var_2271, interleave = K_sliding_out_5_interleave_0, values = (var_2264_cast_fp16, new_k_5_cast_fp16, var_2269_cast_fp16))[name = string("K_sliding_out_5_cast_fp16")]; + tensor var_2277_begin_0 = const()[name = string("op_2277_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2277_end_0 = const()[name = string("op_2277_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_2277_end_mask_0 = const()[name = string("op_2277_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2277_cast_fp16 = slice_by_index(begin = var_2277_begin_0, end = var_2277_end_0, end_mask = var_2277_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("op_2277_cast_fp16")]; + tensor var_2282_begin_0 = const()[name = string("op_2282_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2282_end_0 = const()[name = string("op_2282_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_2282_end_mask_0 = const()[name = string("op_2282_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2282_cast_fp16 = slice_by_index(begin = var_2282_begin_0, end = var_2282_end_0, end_mask = var_2282_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("op_2282_cast_fp16")]; + int32 var_2284 = const()[name = string("op_2284"), val = int32(0)]; + bool V_sliding_out_5_interleave_0 = const()[name = string("V_sliding_out_5_interleave_0"), val = bool(false)]; + tensor V_sliding_out_5_cast_fp16 = concat(axis = var_2284, interleave = V_sliding_out_5_interleave_0, values = (var_2277_cast_fp16, new_v_5_cast_fp16, var_2282_cast_fp16))[name = string("V_sliding_out_5_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = string("op_2290_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = string("op_2290_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2290_end_mask_0 = const()[name = string("op_2290_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2290_cast_fp16")]; + tensor K_for_attn_5_begin_0 = const()[name = string("K_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_5_end_0 = const()[name = string("K_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_5_end_mask_0 = const()[name = string("K_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_5_cast_fp16 = slice_by_index(begin = K_for_attn_5_begin_0, end = K_for_attn_5_end_0, end_mask = K_for_attn_5_end_mask_0, x = var_2290_cast_fp16)[name = string("K_for_attn_5_cast_fp16")]; + tensor var_2300_begin_0 = const()[name = string("op_2300_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_2300_end_0 = const()[name = string("op_2300_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2300_end_mask_0 = const()[name = string("op_2300_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2300_cast_fp16 = slice_by_index(begin = var_2300_begin_0, end = var_2300_end_0, end_mask = var_2300_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2300_cast_fp16")]; + tensor V_for_attn_5_begin_0 = const()[name = string("V_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_5_end_0 = const()[name = string("V_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_5_end_mask_0 = const()[name = string("V_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_5_cast_fp16 = slice_by_index(begin = V_for_attn_5_begin_0, end = V_for_attn_5_end_0, end_mask = V_for_attn_5_end_mask_0, x = var_2300_cast_fp16)[name = string("V_for_attn_5_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = K_for_attn_5_cast_fp16)[name = string("transpose_193")]; + tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_8, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_192")]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_9, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = V_for_attn_5_cast_fp16)[name = string("transpose_191")]; + tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_10, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; + tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_190")]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_11, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor transpose_56_cast_fp16 = transpose(perm = transpose_56_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_189")]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_35_cast_fp16, y = transpose_56_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_47_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_47_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_47_cast_fp16)[name = string("reduce_max_2")]; + tensor var_2335 = sub(x = x_47_cast_fp16, y = reduce_max_2)[name = string("op_2335")]; + tensor var_2341 = exp(x = var_2335)[name = string("op_2341")]; + tensor var_2351_axes_0 = const()[name = string("op_2351_axes_0"), val = tensor([-1])]; + bool var_2351_keep_dims_0 = const()[name = string("op_2351_keep_dims_0"), val = bool(true)]; + tensor var_2351 = reduce_sum(axes = var_2351_axes_0, keep_dims = var_2351_keep_dims_0, x = var_2341)[name = string("op_2351")]; + tensor var_2357_cast_fp16 = real_div(x = var_2341, y = var_2351)[name = string("op_2357_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_188")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_2357_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_2368 = const()[name = string("op_2368"), val = tensor([0, 2, 1, 3])]; + tensor var_2375 = const()[name = string("op_2375"), val = tensor([1, 3, -1])]; + tensor var_2369_cast_fp16 = transpose(perm = var_2368, x = attn_output_13_cast_fp16)[name = string("transpose_187")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2375, x = var_2369_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2380 = const()[name = string("op_2380"), val = tensor([0, 2, 1])]; + string var_2396_pad_type_0 = const()[name = string("op_2396_pad_type_0"), val = string("valid")]; + int32 var_2396_groups_0 = const()[name = string("op_2396_groups_0"), val = int32(1)]; + tensor var_2396_strides_0 = const()[name = string("op_2396_strides_0"), val = tensor([1])]; + tensor var_2396_pad_0 = const()[name = string("op_2396_pad_0"), val = tensor([0, 0])]; + tensor var_2396_dilations_0 = const()[name = string("op_2396_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537206976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539828480))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2381_cast_fp16 = transpose(perm = var_2380, x = attn_output_15_cast_fp16)[name = string("transpose_186")]; + tensor var_2396_cast_fp16 = conv(dilations = var_2396_dilations_0, groups = var_2396_groups_0, pad = var_2396_pad_0, pad_type = var_2396_pad_type_0, strides = var_2396_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2381_cast_fp16)[name = string("op_2396_cast_fp16")]; + tensor var_2400 = const()[name = string("op_2400"), val = tensor([0, 2, 1])]; + int32 var_2406 = const()[name = string("op_2406"), val = int32(-1)]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_51_cast_fp16 = transpose(perm = var_2400, x = var_2396_cast_fp16)[name = string("transpose_185")]; + tensor var_2408_cast_fp16 = mul(x = x_51_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2408_cast_fp16")]; + bool input_75_interleave_0 = const()[name = string("input_75_interleave_0"), val = bool(false)]; + tensor input_75_cast_fp16 = concat(axis = var_2406, interleave = input_75_interleave_0, values = (x_51_cast_fp16, var_2408_cast_fp16))[name = string("input_75_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_2403_to_fp16 = const()[name = string("op_2403_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2403_to_fp16, x = input_75_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor var_2413_split_sizes_0 = const()[name = string("op_2413_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2413_axis_0 = const()[name = string("op_2413_axis_0"), val = int32(-1)]; + tensor var_2413_cast_fp16_0, tensor var_2413_cast_fp16_1 = split(axis = var_2413_axis_0, split_sizes = var_2413_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2413_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539831104)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_2413_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_39_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_2422 = const()[name = string("op_2422"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2424_cast_fp16 = mul(x = x_53_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2424_cast_fp16")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77_cast_fp16 = concat(axis = var_2422, interleave = input_77_interleave_0, values = (x_53_cast_fp16, var_2424_cast_fp16))[name = string("input_77_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_2419_to_fp16 = const()[name = string("op_2419_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2419_to_fp16, x = input_77_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_2429_split_sizes_0 = const()[name = string("op_2429_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2429_axis_0 = const()[name = string("op_2429_axis_0"), val = int32(-1)]; + tensor var_2429_cast_fp16_0, tensor var_2429_cast_fp16_1 = split(axis = var_2429_axis_0, split_sizes = var_2429_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2429_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539836288)))]; + tensor h_15_cast_fp16 = mul(x = var_2429_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_2440 = const()[name = string("op_2440"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_2441 = transpose(perm = var_2440, x = h_15_cast_fp16)[name = string("transpose_184")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_2441)[name = string("input_79")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_79)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_81 = mul(x = gate_11, y = up_5)[name = string("input_81")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_81)[name = string("mlp_out_5")]; + tensor var_2481_axes_0 = const()[name = string("op_2481_axes_0"), val = tensor([2])]; + tensor var_2481 = squeeze(axes = var_2481_axes_0, x = mlp_out_5)[name = string("op_2481")]; + tensor var_2485 = const()[name = string("op_2485"), val = tensor([0, 2, 1])]; + int32 var_2491 = const()[name = string("op_2491"), val = int32(-1)]; + fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; + tensor x_55 = transpose(perm = var_2485, x = var_2481)[name = string("transpose_183")]; + tensor var_2493 = mul(x = x_55, y = const_33_promoted)[name = string("op_2493")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83 = concat(axis = var_2491, interleave = input_83_interleave_0, values = (x_55, var_2493))[name = string("input_83")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_2488_to_fp16 = const()[name = string("op_2488_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2488_to_fp16, x = input_83)[name = string("normed_77_cast_fp16")]; + tensor var_2498_split_sizes_0 = const()[name = string("op_2498_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2498_axis_0 = const()[name = string("op_2498_axis_0"), val = int32(-1)]; + tensor var_2498_0, tensor var_2498_1 = split(axis = var_2498_axis_0, split_sizes = var_2498_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2498")]; + tensor hidden_states_23 = mul(x = var_2498_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 3584])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 3, 3840])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_2526 = const()[name = string("op_2526"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_2527 = transpose(perm = var_2526, x = hidden_states_25_cast_fp16)[name = string("transpose_182")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_2527)[name = string("input_85")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_85)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_2546 = const()[name = string("op_2546"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_2547_cast_fp16 = transpose(perm = var_2546, x = per_layer_slice_5_cast_fp16)[name = string("transpose_181")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_2547_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_87_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_87_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539841472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540169216))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_87_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_2563_axes_0 = const()[name = string("op_2563_axes_0"), val = tensor([2])]; + tensor var_2563_cast_fp16 = squeeze(axes = var_2563_axes_0, x = gated_17_cast_fp16)[name = string("op_2563_cast_fp16")]; + tensor var_2567 = const()[name = string("op_2567"), val = tensor([0, 2, 1])]; + int32 var_2573 = const()[name = string("op_2573"), val = int32(-1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_57_cast_fp16 = transpose(perm = var_2567, x = var_2563_cast_fp16)[name = string("transpose_180")]; + tensor var_2575_cast_fp16 = mul(x = x_57_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2575_cast_fp16")]; + bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; + tensor input_89_cast_fp16 = concat(axis = var_2573, interleave = input_89_interleave_0, values = (x_57_cast_fp16, var_2575_cast_fp16))[name = string("input_89_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_2570_to_fp16 = const()[name = string("op_2570_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2570_to_fp16, x = input_89_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor var_2580_split_sizes_0 = const()[name = string("op_2580_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2580_axis_0 = const()[name = string("op_2580_axis_0"), val = int32(-1)]; + tensor var_2580_cast_fp16_0, tensor var_2580_cast_fp16_1 = split(axis = var_2580_axis_0, split_sizes = var_2580_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2580_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540171840)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_2580_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = tensor([0x1.58p-1])]; + tensor x_59_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_35_promoted_to_fp16)[name = string("x_59_cast_fp16")]; + int32 var_2595 = const()[name = string("op_2595"), val = int32(-1)]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2597_cast_fp16 = mul(x = x_59_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2597_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_2595, interleave = input_91_interleave_0, values = (x_59_cast_fp16, var_2597_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_2592_to_fp16 = const()[name = string("op_2592_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2592_to_fp16, x = input_91_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_2602_split_sizes_0 = const()[name = string("op_2602_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2602_axis_0 = const()[name = string("op_2602_axis_0"), val = int32(-1)]; + tensor var_2602_cast_fp16_0, tensor var_2602_cast_fp16_1 = split(axis = var_2602_axis_0, split_sizes = var_2602_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2602_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540177024)))]; + tensor h_19_cast_fp16 = mul(x = var_2602_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_2608 = const()[name = string("op_2608"), val = tensor([0, 2, 1])]; + tensor var_2611_axes_0 = const()[name = string("op_2611_axes_0"), val = tensor([2])]; + tensor var_2609_cast_fp16 = transpose(perm = var_2608, x = h_19_cast_fp16)[name = string("transpose_179")]; + tensor var_2611_cast_fp16 = expand_dims(axes = var_2611_axes_0, x = var_2609_cast_fp16)[name = string("op_2611_cast_fp16")]; + string q_37_pad_type_0 = const()[name = string("q_37_pad_type_0"), val = string("valid")]; + tensor q_37_strides_0 = const()[name = string("q_37_strides_0"), val = tensor([1, 1])]; + tensor q_37_pad_0 = const()[name = string("q_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_37_dilations_0 = const()[name = string("q_37_dilations_0"), val = tensor([1, 1])]; + int32 q_37_groups_0 = const()[name = string("q_37_groups_0"), val = int32(1)]; + tensor q_37 = conv(dilations = q_37_dilations_0, groups = q_37_groups_0, pad = q_37_pad_0, pad_type = q_37_pad_type_0, strides = q_37_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_2611_cast_fp16)[name = string("q_37")]; + tensor var_2632 = const()[name = string("op_2632"), val = tensor([1, 8, 256, 3])]; + tensor var_2633 = reshape(shape = var_2632, x = q_37)[name = string("op_2633")]; + tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2656 = const()[name = string("op_2656"), val = tensor([3, 8, 256])]; + tensor transpose_57 = transpose(perm = transpose_57_perm_0, x = var_2633)[name = string("transpose_178")]; + tensor x_61 = reshape(shape = var_2656, x = transpose_57)[name = string("x_61")]; + int32 var_2662 = const()[name = string("op_2662"), val = int32(-1)]; + fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; + tensor var_2664 = mul(x = x_61, y = const_37_promoted)[name = string("op_2664")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95 = concat(axis = var_2662, interleave = input_95_interleave_0, values = (x_61, var_2664))[name = string("input_95")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_2659_to_fp16 = const()[name = string("op_2659_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_2659_to_fp16, x = input_95)[name = string("normed_89_cast_fp16")]; + tensor var_2669_split_sizes_0 = const()[name = string("op_2669_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2669_axis_0 = const()[name = string("op_2669_axis_0"), val = int32(-1)]; + tensor var_2669_0, tensor var_2669_1 = split(axis = var_2669_axis_0, split_sizes = var_2669_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_2669")]; + tensor q_41 = mul(x = var_2669_0, y = layers_3_self_attn_q_norm_weight)[name = string("q_41")]; + tensor var_2676 = const()[name = string("op_2676"), val = tensor([1, 3, 8, 256])]; + tensor var_2677 = reshape(shape = var_2676, x = q_41)[name = string("op_2677")]; + tensor var_2682 = const()[name = string("op_2682"), val = tensor([0, 2, 1, 3])]; + tensor q_43 = transpose(perm = var_2682, x = var_2677)[name = string("transpose_177")]; + tensor var_2684_cast_fp16 = mul(x = q_43, y = cos_s)[name = string("op_2684_cast_fp16")]; + tensor var_2685_split_sizes_0 = const()[name = string("op_2685_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2685_axis_0 = const()[name = string("op_2685_axis_0"), val = int32(-1)]; + tensor var_2685_0, tensor var_2685_1 = split(axis = var_2685_axis_0, split_sizes = var_2685_split_sizes_0, x = q_43)[name = string("op_2685")]; + fp16 const_38_promoted = const()[name = string("const_38_promoted"), val = fp16(-0x1p+0)]; + tensor var_2687 = mul(x = var_2685_1, y = const_38_promoted)[name = string("op_2687")]; + int32 var_2689 = const()[name = string("op_2689"), val = int32(-1)]; + bool var_2690_interleave_0 = const()[name = string("op_2690_interleave_0"), val = bool(false)]; + tensor var_2690 = concat(axis = var_2689, interleave = var_2690_interleave_0, values = (var_2687, var_2685_0))[name = string("op_2690")]; + tensor var_2691_cast_fp16 = mul(x = var_2690, y = sin_s)[name = string("op_2691_cast_fp16")]; + tensor q_47_cast_fp16 = add(x = var_2684_cast_fp16, y = var_2691_cast_fp16)[name = string("q_47_cast_fp16")]; + string k_19_pad_type_0 = const()[name = string("k_19_pad_type_0"), val = string("valid")]; + tensor k_19_strides_0 = const()[name = string("k_19_strides_0"), val = tensor([1, 1])]; + tensor k_19_pad_0 = const()[name = string("k_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_19_dilations_0 = const()[name = string("k_19_dilations_0"), val = tensor([1, 1])]; + int32 k_19_groups_0 = const()[name = string("k_19_groups_0"), val = int32(1)]; + tensor k_19 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = layers_3_self_attn_k_proj_weight_palettized, x = var_2611_cast_fp16)[name = string("k_19")]; + tensor var_2709 = const()[name = string("op_2709"), val = tensor([1, 2, 256, 3])]; + tensor var_2710 = reshape(shape = var_2709, x = k_19)[name = string("op_2710")]; + tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_7_pad_type_0 = const()[name = string("v_7_pad_type_0"), val = string("valid")]; + tensor v_7_strides_0 = const()[name = string("v_7_strides_0"), val = tensor([1, 1])]; + tensor v_7_pad_0 = const()[name = string("v_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_7_dilations_0 = const()[name = string("v_7_dilations_0"), val = tensor([1, 1])]; + int32 v_7_groups_0 = const()[name = string("v_7_groups_0"), val = int32(1)]; + tensor v_7 = conv(dilations = v_7_dilations_0, groups = v_7_groups_0, pad = v_7_pad_0, pad_type = v_7_pad_type_0, strides = v_7_strides_0, weight = layers_3_self_attn_v_proj_weight_palettized, x = var_2611_cast_fp16)[name = string("v_7")]; + tensor var_2737 = const()[name = string("op_2737"), val = tensor([1, 2, 256, 3])]; + tensor var_2738 = reshape(shape = var_2737, x = v_7)[name = string("op_2738")]; + tensor var_2743 = const()[name = string("op_2743"), val = tensor([0, 1, 3, 2])]; + tensor var_2761 = const()[name = string("op_2761"), val = tensor([3, 2, 256])]; + tensor transpose_58 = transpose(perm = transpose_58_perm_0, x = var_2710)[name = string("transpose_176")]; + tensor x_63 = reshape(shape = var_2761, x = transpose_58)[name = string("x_63")]; + int32 var_2767 = const()[name = string("op_2767"), val = int32(-1)]; + fp16 const_39_promoted = const()[name = string("const_39_promoted"), val = fp16(-0x1p+0)]; + tensor var_2769 = mul(x = x_63, y = const_39_promoted)[name = string("op_2769")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97 = concat(axis = var_2767, interleave = input_97_interleave_0, values = (x_63, var_2769))[name = string("input_97")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_2764_to_fp16 = const()[name = string("op_2764_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_2764_to_fp16, x = input_97)[name = string("normed_93_cast_fp16")]; + tensor var_2774_split_sizes_0 = const()[name = string("op_2774_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2774_axis_0 = const()[name = string("op_2774_axis_0"), val = int32(-1)]; + tensor var_2774_0, tensor var_2774_1 = split(axis = var_2774_axis_0, split_sizes = var_2774_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_2774")]; + tensor k_23 = mul(x = var_2774_0, y = layers_3_self_attn_k_norm_weight)[name = string("k_23")]; + tensor var_2781 = const()[name = string("op_2781"), val = tensor([1, 3, 2, 256])]; + tensor var_2782 = reshape(shape = var_2781, x = k_23)[name = string("op_2782")]; + tensor var_2787 = const()[name = string("op_2787"), val = tensor([0, 2, 1, 3])]; + fp16 var_2789_promoted = const()[name = string("op_2789_promoted"), val = fp16(0x1p+1)]; + tensor var_2744 = transpose(perm = var_2743, x = var_2738)[name = string("transpose_175")]; + tensor var_2790 = pow(x = var_2744, y = var_2789_promoted)[name = string("op_2790")]; + tensor var_2795_axes_0 = const()[name = string("op_2795_axes_0"), val = tensor([-1])]; + bool var_2795_keep_dims_0 = const()[name = string("op_2795_keep_dims_0"), val = bool(true)]; + tensor var_2795 = reduce_mean(axes = var_2795_axes_0, keep_dims = var_2795_keep_dims_0, x = var_2790)[name = string("op_2795")]; + fp16 var_2797_to_fp16 = const()[name = string("op_2797_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_7_cast_fp16 = add(x = var_2795, y = var_2797_to_fp16)[name = string("mean_sq_7_cast_fp16")]; + fp32 var_2799_epsilon_0 = const()[name = string("op_2799_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2799_cast_fp16 = rsqrt(epsilon = var_2799_epsilon_0, x = mean_sq_7_cast_fp16)[name = string("op_2799_cast_fp16")]; + tensor input_101_cast_fp16 = mul(x = var_2744, y = var_2799_cast_fp16)[name = string("input_101_cast_fp16")]; + tensor q_45 = transpose(perm = var_2787, x = var_2782)[name = string("transpose_174")]; + tensor var_2801_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_2801_cast_fp16")]; + tensor var_2802_split_sizes_0 = const()[name = string("op_2802_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2802_axis_0 = const()[name = string("op_2802_axis_0"), val = int32(-1)]; + tensor var_2802_0, tensor var_2802_1 = split(axis = var_2802_axis_0, split_sizes = var_2802_split_sizes_0, x = q_45)[name = string("op_2802")]; + fp16 const_40_promoted = const()[name = string("const_40_promoted"), val = fp16(-0x1p+0)]; + tensor var_2804 = mul(x = var_2802_1, y = const_40_promoted)[name = string("op_2804")]; + int32 var_2806 = const()[name = string("op_2806"), val = int32(-1)]; + bool var_2807_interleave_0 = const()[name = string("op_2807_interleave_0"), val = bool(false)]; + tensor var_2807 = concat(axis = var_2806, interleave = var_2807_interleave_0, values = (var_2804, var_2802_0))[name = string("op_2807")]; + tensor var_2808_cast_fp16 = mul(x = var_2807, y = sin_s)[name = string("op_2808_cast_fp16")]; + tensor input_99_cast_fp16 = add(x = var_2801_cast_fp16, y = var_2808_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor k_padded_7_pad_0 = const()[name = string("k_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_7_mode_0 = const()[name = string("k_padded_7_mode_0"), val = string("constant")]; + fp16 const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_7_cast_fp16 = pad(constant_val = const_41_to_fp16, mode = k_padded_7_mode_0, pad = k_padded_7_pad_0, x = input_99_cast_fp16)[name = string("k_padded_7_cast_fp16")]; + tensor v_padded_7_pad_0 = const()[name = string("v_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_7_mode_0 = const()[name = string("v_padded_7_mode_0"), val = string("constant")]; + fp16 const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_7_cast_fp16 = pad(constant_val = const_42_to_fp16, mode = v_padded_7_mode_0, pad = v_padded_7_pad_0, x = input_101_cast_fp16)[name = string("v_padded_7_cast_fp16")]; + tensor slot_k_7_begin_0 = const()[name = string("slot_k_7_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor slot_k_7_end_0 = const()[name = string("slot_k_7_end_0"), val = tensor([4, 2, 512, 512])]; + tensor slot_k_7_end_mask_0 = const()[name = string("slot_k_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_7_cast_fp16 = slice_by_index(begin = slot_k_7_begin_0, end = slot_k_7_end_0, end_mask = slot_k_7_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("slot_k_7_cast_fp16")]; + tensor slot_v_7_begin_0 = const()[name = string("slot_v_7_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor slot_v_7_end_0 = const()[name = string("slot_v_7_end_0"), val = tensor([4, 2, 512, 512])]; + tensor slot_v_7_end_mask_0 = const()[name = string("slot_v_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_7_cast_fp16 = slice_by_index(begin = slot_v_7_begin_0, end = slot_v_7_end_0, end_mask = slot_v_7_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("slot_v_7_cast_fp16")]; + tensor var_2847_begin_0 = const()[name = string("op_2847_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_2847_end_0 = const()[name = string("op_2847_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2847_end_mask_0 = const()[name = string("op_2847_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2847_cast_fp16 = slice_by_index(begin = var_2847_begin_0, end = var_2847_end_0, end_mask = var_2847_end_mask_0, x = slot_k_7_cast_fp16)[name = string("op_2847_cast_fp16")]; + int32 var_2854 = const()[name = string("op_2854"), val = int32(2)]; + bool new_k_7_interleave_0 = const()[name = string("new_k_7_interleave_0"), val = bool(false)]; + tensor new_k_7_cast_fp16 = concat(axis = var_2854, interleave = new_k_7_interleave_0, values = (var_2847_cast_fp16, k_padded_7_cast_fp16))[name = string("new_k_7_cast_fp16")]; + tensor var_2870_begin_0 = const()[name = string("op_2870_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_2870_end_0 = const()[name = string("op_2870_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2870_end_mask_0 = const()[name = string("op_2870_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2870_cast_fp16 = slice_by_index(begin = var_2870_begin_0, end = var_2870_end_0, end_mask = var_2870_end_mask_0, x = slot_v_7_cast_fp16)[name = string("op_2870_cast_fp16")]; + int32 var_2877 = const()[name = string("op_2877"), val = int32(2)]; + bool new_v_7_interleave_0 = const()[name = string("new_v_7_interleave_0"), val = bool(false)]; + tensor new_v_7_cast_fp16 = concat(axis = var_2877, interleave = new_v_7_interleave_0, values = (var_2870_cast_fp16, v_padded_7_cast_fp16))[name = string("new_v_7_cast_fp16")]; + tensor var_2883_begin_0 = const()[name = string("op_2883_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2883_end_0 = const()[name = string("op_2883_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2883_end_mask_0 = const()[name = string("op_2883_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2883_cast_fp16 = slice_by_index(begin = var_2883_begin_0, end = var_2883_end_0, end_mask = var_2883_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2883_cast_fp16")]; + tensor var_2888_begin_0 = const()[name = string("op_2888_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_2888_end_0 = const()[name = string("op_2888_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_2888_end_mask_0 = const()[name = string("op_2888_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2888_cast_fp16 = slice_by_index(begin = var_2888_begin_0, end = var_2888_end_0, end_mask = var_2888_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2888_cast_fp16")]; + int32 var_2890 = const()[name = string("op_2890"), val = int32(0)]; + bool K_sliding_out_7_interleave_0 = const()[name = string("K_sliding_out_7_interleave_0"), val = bool(false)]; + tensor K_sliding_out_7_cast_fp16 = concat(axis = var_2890, interleave = K_sliding_out_7_interleave_0, values = (var_2883_cast_fp16, new_k_7_cast_fp16, var_2888_cast_fp16))[name = string("K_sliding_out_7_cast_fp16")]; + tensor var_2896_begin_0 = const()[name = string("op_2896_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2896_end_0 = const()[name = string("op_2896_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2896_end_mask_0 = const()[name = string("op_2896_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2896_cast_fp16 = slice_by_index(begin = var_2896_begin_0, end = var_2896_end_0, end_mask = var_2896_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2896_cast_fp16")]; + tensor var_2901_begin_0 = const()[name = string("op_2901_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_2901_end_0 = const()[name = string("op_2901_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_2901_end_mask_0 = const()[name = string("op_2901_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2901_cast_fp16 = slice_by_index(begin = var_2901_begin_0, end = var_2901_end_0, end_mask = var_2901_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2901_cast_fp16")]; + int32 var_2903 = const()[name = string("op_2903"), val = int32(0)]; + bool V_sliding_out_7_interleave_0 = const()[name = string("V_sliding_out_7_interleave_0"), val = bool(false)]; + tensor V_sliding_out_7_cast_fp16 = concat(axis = var_2903, interleave = V_sliding_out_7_interleave_0, values = (var_2896_cast_fp16, new_v_7_cast_fp16, var_2901_cast_fp16))[name = string("V_sliding_out_7_cast_fp16")]; + tensor var_2909_begin_0 = const()[name = string("op_2909_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2909_end_0 = const()[name = string("op_2909_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2909_end_mask_0 = const()[name = string("op_2909_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2909_cast_fp16 = slice_by_index(begin = var_2909_begin_0, end = var_2909_end_0, end_mask = var_2909_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("op_2909_cast_fp16")]; + tensor K_for_attn_7_begin_0 = const()[name = string("K_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_7_end_0 = const()[name = string("K_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_7_end_mask_0 = const()[name = string("K_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_7_cast_fp16 = slice_by_index(begin = K_for_attn_7_begin_0, end = K_for_attn_7_end_0, end_mask = K_for_attn_7_end_mask_0, x = var_2909_cast_fp16)[name = string("K_for_attn_7_cast_fp16")]; + tensor var_2919_begin_0 = const()[name = string("op_2919_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2919_end_0 = const()[name = string("op_2919_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2919_end_mask_0 = const()[name = string("op_2919_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2919_cast_fp16 = slice_by_index(begin = var_2919_begin_0, end = var_2919_end_0, end_mask = var_2919_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("op_2919_cast_fp16")]; + tensor V_for_attn_7_begin_0 = const()[name = string("V_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_7_end_0 = const()[name = string("V_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_7_end_mask_0 = const()[name = string("V_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_7_cast_fp16 = slice_by_index(begin = V_for_attn_7_begin_0, end = V_for_attn_7_end_0, end_mask = V_for_attn_7_end_mask_0, x = var_2919_cast_fp16)[name = string("V_for_attn_7_cast_fp16")]; + tensor transpose_12_perm_0 = const()[name = string("transpose_12_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_6_reps_0 = const()[name = string("tile_6_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = K_for_attn_7_cast_fp16)[name = string("transpose_173")]; + tensor tile_6_cast_fp16 = tile(reps = tile_6_reps_0, x = transpose_12_cast_fp16)[name = string("tile_6_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_12, x = tile_6_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = reshape_12_cast_fp16)[name = string("transpose_172")]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_13, x = transpose_13_cast_fp16)[name = string("reshape_13_cast_fp16")]; + tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_14_perm_0 = const()[name = string("transpose_14_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_7_reps_0 = const()[name = string("tile_7_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = V_for_attn_7_cast_fp16)[name = string("transpose_171")]; + tensor tile_7_cast_fp16 = tile(reps = tile_7_reps_0, x = transpose_14_cast_fp16)[name = string("tile_7_cast_fp16")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_14, x = tile_7_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor transpose_15_perm_0 = const()[name = string("transpose_15_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_170")]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_15, x = transpose_15_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor V_expanded_7_perm_0 = const()[name = string("V_expanded_7_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor transpose_59_cast_fp16 = transpose(perm = transpose_59_perm_0, x = reshape_13_cast_fp16)[name = string("transpose_169")]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_47_cast_fp16, y = transpose_59_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_3")]; + tensor var_2954 = sub(x = x_67_cast_fp16, y = reduce_max_3)[name = string("op_2954")]; + tensor var_2960 = exp(x = var_2954)[name = string("op_2960")]; + tensor var_2970_axes_0 = const()[name = string("op_2970_axes_0"), val = tensor([-1])]; + bool var_2970_keep_dims_0 = const()[name = string("op_2970_keep_dims_0"), val = bool(true)]; + tensor var_2970 = reduce_sum(axes = var_2970_axes_0, keep_dims = var_2970_keep_dims_0, x = var_2960)[name = string("op_2970")]; + tensor var_2976_cast_fp16 = real_div(x = var_2960, y = var_2970)[name = string("op_2976_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor V_expanded_7_cast_fp16 = transpose(perm = V_expanded_7_perm_0, x = reshape_15_cast_fp16)[name = string("transpose_168")]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_2976_cast_fp16, y = V_expanded_7_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_2987 = const()[name = string("op_2987"), val = tensor([0, 2, 1, 3])]; + tensor var_2994 = const()[name = string("op_2994"), val = tensor([1, 3, -1])]; + tensor var_2988_cast_fp16 = transpose(perm = var_2987, x = attn_output_19_cast_fp16)[name = string("transpose_167")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_2994, x = var_2988_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_2999 = const()[name = string("op_2999"), val = tensor([0, 2, 1])]; + string var_3015_pad_type_0 = const()[name = string("op_3015_pad_type_0"), val = string("valid")]; + int32 var_3015_groups_0 = const()[name = string("op_3015_groups_0"), val = int32(1)]; + tensor var_3015_strides_0 = const()[name = string("op_3015_strides_0"), val = tensor([1])]; + tensor var_3015_pad_0 = const()[name = string("op_3015_pad_0"), val = tensor([0, 0])]; + tensor var_3015_dilations_0 = const()[name = string("op_3015_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540182208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542803712))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3000_cast_fp16 = transpose(perm = var_2999, x = attn_output_21_cast_fp16)[name = string("transpose_166")]; + tensor var_3015_cast_fp16 = conv(dilations = var_3015_dilations_0, groups = var_3015_groups_0, pad = var_3015_pad_0, pad_type = var_3015_pad_type_0, strides = var_3015_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3000_cast_fp16)[name = string("op_3015_cast_fp16")]; + tensor var_3019 = const()[name = string("op_3019"), val = tensor([0, 2, 1])]; + int32 var_3025 = const()[name = string("op_3025"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_3019, x = var_3015_cast_fp16)[name = string("transpose_165")]; + tensor var_3027_cast_fp16 = mul(x = x_71_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_3027_cast_fp16")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105_cast_fp16 = concat(axis = var_3025, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_3027_cast_fp16))[name = string("input_105_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_3022_to_fp16, x = input_105_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor var_3032_split_sizes_0 = const()[name = string("op_3032_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3032_axis_0 = const()[name = string("op_3032_axis_0"), val = int32(-1)]; + tensor var_3032_cast_fp16_0, tensor var_3032_cast_fp16_1 = split(axis = var_3032_axis_0, split_sizes = var_3032_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_3032_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542806336)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_3032_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_59_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_3041 = const()[name = string("op_3041"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3043_cast_fp16 = mul(x = x_73_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_3043_cast_fp16")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107_cast_fp16 = concat(axis = var_3041, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_3043_cast_fp16))[name = string("input_107_cast_fp16")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_3038_to_fp16 = const()[name = string("op_3038_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_3038_to_fp16, x = input_107_cast_fp16)[name = string("normed_101_cast_fp16")]; + tensor var_3048_split_sizes_0 = const()[name = string("op_3048_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3048_axis_0 = const()[name = string("op_3048_axis_0"), val = int32(-1)]; + tensor var_3048_cast_fp16_0, tensor var_3048_cast_fp16_1 = split(axis = var_3048_axis_0, split_sizes = var_3048_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_3048_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542811520)))]; + tensor h_21_cast_fp16 = mul(x = var_3048_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_3059 = const()[name = string("op_3059"), val = tensor([0, 2, 1])]; + tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([2])]; + tensor var_3060 = transpose(perm = var_3059, x = h_21_cast_fp16)[name = string("transpose_164")]; + tensor input_109 = expand_dims(axes = input_109_axes_0, x = var_3060)[name = string("input_109")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_111 = mul(x = gate_15, y = up_7)[name = string("input_111")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_7")]; + tensor var_3100_axes_0 = const()[name = string("op_3100_axes_0"), val = tensor([2])]; + tensor var_3100 = squeeze(axes = var_3100_axes_0, x = mlp_out_7)[name = string("op_3100")]; + tensor var_3104 = const()[name = string("op_3104"), val = tensor([0, 2, 1])]; + int32 var_3110 = const()[name = string("op_3110"), val = int32(-1)]; + fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_3104, x = var_3100)[name = string("transpose_163")]; + tensor var_3112 = mul(x = x_75, y = const_45_promoted)[name = string("op_3112")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_3110, interleave = input_113_interleave_0, values = (x_75, var_3112))[name = string("input_113")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_3107_to_fp16 = const()[name = string("op_3107_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_3107_to_fp16, x = input_113)[name = string("normed_105_cast_fp16")]; + tensor var_3117_split_sizes_0 = const()[name = string("op_3117_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3117_axis_0 = const()[name = string("op_3117_axis_0"), val = int32(-1)]; + tensor var_3117_0, tensor var_3117_1 = split(axis = var_3117_axis_0, split_sizes = var_3117_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_3117")]; + tensor hidden_states_33 = mul(x = var_3117_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 3840])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 3, 4096])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_3145 = const()[name = string("op_3145"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_3146 = transpose(perm = var_3145, x = hidden_states_35_cast_fp16)[name = string("transpose_162")]; + tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_3146)[name = string("input_115")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_3165 = const()[name = string("op_3165"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_3166_cast_fp16 = transpose(perm = var_3165, x = per_layer_slice_7_cast_fp16)[name = string("transpose_161")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_3166_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_117_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(542816704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543144448))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_3182_axes_0 = const()[name = string("op_3182_axes_0"), val = tensor([2])]; + tensor var_3182_cast_fp16 = squeeze(axes = var_3182_axes_0, x = gated_23_cast_fp16)[name = string("op_3182_cast_fp16")]; + tensor var_3186 = const()[name = string("op_3186"), val = tensor([0, 2, 1])]; + int32 var_3192 = const()[name = string("op_3192"), val = int32(-1)]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_3186, x = var_3182_cast_fp16)[name = string("transpose_160")]; + tensor var_3194_cast_fp16 = mul(x = x_77_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_3194_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_3192, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_3194_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_3189_to_fp16 = const()[name = string("op_3189_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_3189_to_fp16, x = input_119_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor var_3199_split_sizes_0 = const()[name = string("op_3199_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3199_axis_0 = const()[name = string("op_3199_axis_0"), val = int32(-1)]; + tensor var_3199_cast_fp16_0, tensor var_3199_cast_fp16_1 = split(axis = var_3199_axis_0, split_sizes = var_3199_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_3199_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543147072)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_3199_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor([0x1.14p-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + int32 var_3214 = const()[name = string("op_3214"), val = int32(-1)]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3216_cast_fp16 = mul(x = x_79_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_3216_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_3214, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_3216_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_3211_to_fp16 = const()[name = string("op_3211_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_3211_to_fp16, x = input_121_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor var_3221_split_sizes_0 = const()[name = string("op_3221_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3221_axis_0 = const()[name = string("op_3221_axis_0"), val = int32(-1)]; + tensor var_3221_cast_fp16_0, tensor var_3221_cast_fp16_1 = split(axis = var_3221_axis_0, split_sizes = var_3221_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_3221_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543152256)))]; + tensor h_25_cast_fp16 = mul(x = var_3221_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_3227 = const()[name = string("op_3227"), val = tensor([0, 2, 1])]; + tensor var_3230_axes_0 = const()[name = string("op_3230_axes_0"), val = tensor([2])]; + tensor var_3228_cast_fp16 = transpose(perm = var_3227, x = h_25_cast_fp16)[name = string("transpose_159")]; + tensor var_3230_cast_fp16 = expand_dims(axes = var_3230_axes_0, x = var_3228_cast_fp16)[name = string("op_3230_cast_fp16")]; + string q_49_pad_type_0 = const()[name = string("q_49_pad_type_0"), val = string("valid")]; + tensor q_49_strides_0 = const()[name = string("q_49_strides_0"), val = tensor([1, 1])]; + tensor q_49_pad_0 = const()[name = string("q_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_49_dilations_0 = const()[name = string("q_49_dilations_0"), val = tensor([1, 1])]; + int32 q_49_groups_0 = const()[name = string("q_49_groups_0"), val = int32(1)]; + tensor q_49 = conv(dilations = q_49_dilations_0, groups = q_49_groups_0, pad = q_49_pad_0, pad_type = q_49_pad_type_0, strides = q_49_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_3230_cast_fp16)[name = string("q_49")]; + tensor var_3251 = const()[name = string("op_3251"), val = tensor([1, 8, 256, 3])]; + tensor var_3252 = reshape(shape = var_3251, x = q_49)[name = string("op_3252")]; + tensor transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_3275 = const()[name = string("op_3275"), val = tensor([3, 8, 256])]; + tensor transpose_60 = transpose(perm = transpose_60_perm_0, x = var_3252)[name = string("transpose_158")]; + tensor x_81 = reshape(shape = var_3275, x = transpose_60)[name = string("x_81")]; + int32 var_3281 = const()[name = string("op_3281"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor var_3283 = mul(x = x_81, y = const_49_promoted)[name = string("op_3283")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_3281, interleave = input_125_interleave_0, values = (x_81, var_3283))[name = string("input_125")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_3278_to_fp16 = const()[name = string("op_3278_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_3278_to_fp16, x = input_125)[name = string("normed_117_cast_fp16")]; + tensor var_3288_split_sizes_0 = const()[name = string("op_3288_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3288_axis_0 = const()[name = string("op_3288_axis_0"), val = int32(-1)]; + tensor var_3288_0, tensor var_3288_1 = split(axis = var_3288_axis_0, split_sizes = var_3288_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_3288")]; + tensor q_53 = mul(x = var_3288_0, y = layers_4_self_attn_q_norm_weight)[name = string("q_53")]; + tensor var_3295 = const()[name = string("op_3295"), val = tensor([1, 3, 8, 256])]; + tensor var_3296 = reshape(shape = var_3295, x = q_53)[name = string("op_3296")]; + tensor var_3301 = const()[name = string("op_3301"), val = tensor([0, 2, 1, 3])]; + tensor q_55 = transpose(perm = var_3301, x = var_3296)[name = string("transpose_157")]; + tensor var_3303_cast_fp16 = mul(x = q_55, y = cos_s)[name = string("op_3303_cast_fp16")]; + tensor var_3304_split_sizes_0 = const()[name = string("op_3304_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3304_axis_0 = const()[name = string("op_3304_axis_0"), val = int32(-1)]; + tensor var_3304_0, tensor var_3304_1 = split(axis = var_3304_axis_0, split_sizes = var_3304_split_sizes_0, x = q_55)[name = string("op_3304")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_3306 = mul(x = var_3304_1, y = const_50_promoted)[name = string("op_3306")]; + int32 var_3308 = const()[name = string("op_3308"), val = int32(-1)]; + bool var_3309_interleave_0 = const()[name = string("op_3309_interleave_0"), val = bool(false)]; + tensor var_3309 = concat(axis = var_3308, interleave = var_3309_interleave_0, values = (var_3306, var_3304_0))[name = string("op_3309")]; + tensor var_3310_cast_fp16 = mul(x = var_3309, y = sin_s)[name = string("op_3310_cast_fp16")]; + tensor q_59_cast_fp16 = add(x = var_3303_cast_fp16, y = var_3310_cast_fp16)[name = string("q_59_cast_fp16")]; + string k_25_pad_type_0 = const()[name = string("k_25_pad_type_0"), val = string("valid")]; + tensor k_25_strides_0 = const()[name = string("k_25_strides_0"), val = tensor([1, 1])]; + tensor k_25_pad_0 = const()[name = string("k_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_25_dilations_0 = const()[name = string("k_25_dilations_0"), val = tensor([1, 1])]; + int32 k_25_groups_0 = const()[name = string("k_25_groups_0"), val = int32(1)]; + tensor k_25 = conv(dilations = k_25_dilations_0, groups = k_25_groups_0, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = k_25_strides_0, weight = layers_4_self_attn_k_proj_weight_palettized, x = var_3230_cast_fp16)[name = string("k_25")]; + tensor var_3328 = const()[name = string("op_3328"), val = tensor([1, 2, 256, 3])]; + tensor var_3329 = reshape(shape = var_3328, x = k_25)[name = string("op_3329")]; + tensor transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_9_pad_type_0 = const()[name = string("v_9_pad_type_0"), val = string("valid")]; + tensor v_9_strides_0 = const()[name = string("v_9_strides_0"), val = tensor([1, 1])]; + tensor v_9_pad_0 = const()[name = string("v_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_9_dilations_0 = const()[name = string("v_9_dilations_0"), val = tensor([1, 1])]; + int32 v_9_groups_0 = const()[name = string("v_9_groups_0"), val = int32(1)]; + tensor v_9 = conv(dilations = v_9_dilations_0, groups = v_9_groups_0, pad = v_9_pad_0, pad_type = v_9_pad_type_0, strides = v_9_strides_0, weight = layers_4_self_attn_v_proj_weight_palettized, x = var_3230_cast_fp16)[name = string("v_9")]; + tensor var_3356 = const()[name = string("op_3356"), val = tensor([1, 2, 256, 3])]; + tensor var_3357 = reshape(shape = var_3356, x = v_9)[name = string("op_3357")]; + tensor var_3362 = const()[name = string("op_3362"), val = tensor([0, 1, 3, 2])]; + tensor var_3380 = const()[name = string("op_3380"), val = tensor([3, 2, 256])]; + tensor transpose_61 = transpose(perm = transpose_61_perm_0, x = var_3329)[name = string("transpose_156")]; + tensor x_83 = reshape(shape = var_3380, x = transpose_61)[name = string("x_83")]; + int32 var_3386 = const()[name = string("op_3386"), val = int32(-1)]; + fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; + tensor var_3388 = mul(x = x_83, y = const_51_promoted)[name = string("op_3388")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127 = concat(axis = var_3386, interleave = input_127_interleave_0, values = (x_83, var_3388))[name = string("input_127")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_3383_to_fp16 = const()[name = string("op_3383_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_3383_to_fp16, x = input_127)[name = string("normed_121_cast_fp16")]; + tensor var_3393_split_sizes_0 = const()[name = string("op_3393_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3393_axis_0 = const()[name = string("op_3393_axis_0"), val = int32(-1)]; + tensor var_3393_0, tensor var_3393_1 = split(axis = var_3393_axis_0, split_sizes = var_3393_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_3393")]; + tensor k_29 = mul(x = var_3393_0, y = layers_4_self_attn_k_norm_weight)[name = string("k_29")]; + tensor var_3400 = const()[name = string("op_3400"), val = tensor([1, 3, 2, 256])]; + tensor var_3401 = reshape(shape = var_3400, x = k_29)[name = string("op_3401")]; + tensor var_3406 = const()[name = string("op_3406"), val = tensor([0, 2, 1, 3])]; + fp16 var_3408_promoted = const()[name = string("op_3408_promoted"), val = fp16(0x1p+1)]; + tensor var_3363 = transpose(perm = var_3362, x = var_3357)[name = string("transpose_155")]; + tensor var_3409 = pow(x = var_3363, y = var_3408_promoted)[name = string("op_3409")]; + tensor var_3414_axes_0 = const()[name = string("op_3414_axes_0"), val = tensor([-1])]; + bool var_3414_keep_dims_0 = const()[name = string("op_3414_keep_dims_0"), val = bool(true)]; + tensor var_3414 = reduce_mean(axes = var_3414_axes_0, keep_dims = var_3414_keep_dims_0, x = var_3409)[name = string("op_3414")]; + fp16 var_3416_to_fp16 = const()[name = string("op_3416_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_9_cast_fp16 = add(x = var_3414, y = var_3416_to_fp16)[name = string("mean_sq_9_cast_fp16")]; + fp32 var_3418_epsilon_0 = const()[name = string("op_3418_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3418_cast_fp16 = rsqrt(epsilon = var_3418_epsilon_0, x = mean_sq_9_cast_fp16)[name = string("op_3418_cast_fp16")]; + tensor input_131_cast_fp16 = mul(x = var_3363, y = var_3418_cast_fp16)[name = string("input_131_cast_fp16")]; + tensor q_57 = transpose(perm = var_3406, x = var_3401)[name = string("transpose_154")]; + tensor var_3420_cast_fp16 = mul(x = q_57, y = cos_s)[name = string("op_3420_cast_fp16")]; + tensor var_3421_split_sizes_0 = const()[name = string("op_3421_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3421_axis_0 = const()[name = string("op_3421_axis_0"), val = int32(-1)]; + tensor var_3421_0, tensor var_3421_1 = split(axis = var_3421_axis_0, split_sizes = var_3421_split_sizes_0, x = q_57)[name = string("op_3421")]; + fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)]; + tensor var_3423 = mul(x = var_3421_1, y = const_52_promoted)[name = string("op_3423")]; + int32 var_3425 = const()[name = string("op_3425"), val = int32(-1)]; + bool var_3426_interleave_0 = const()[name = string("op_3426_interleave_0"), val = bool(false)]; + tensor var_3426 = concat(axis = var_3425, interleave = var_3426_interleave_0, values = (var_3423, var_3421_0))[name = string("op_3426")]; + tensor var_3427_cast_fp16 = mul(x = var_3426, y = sin_s)[name = string("op_3427_cast_fp16")]; + tensor input_129_cast_fp16 = add(x = var_3420_cast_fp16, y = var_3427_cast_fp16)[name = string("input_129_cast_fp16")]; + tensor k_padded_9_pad_0 = const()[name = string("k_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_9_mode_0 = const()[name = string("k_padded_9_mode_0"), val = string("constant")]; + fp16 const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_9_cast_fp16 = pad(constant_val = const_53_to_fp16, mode = k_padded_9_mode_0, pad = k_padded_9_pad_0, x = input_129_cast_fp16)[name = string("k_padded_9_cast_fp16")]; + tensor v_padded_9_pad_0 = const()[name = string("v_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_9_mode_0 = const()[name = string("v_padded_9_mode_0"), val = string("constant")]; + fp16 const_54_to_fp16 = const()[name = string("const_54_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_9_cast_fp16 = pad(constant_val = const_54_to_fp16, mode = v_padded_9_mode_0, pad = v_padded_9_pad_0, x = input_131_cast_fp16)[name = string("v_padded_9_cast_fp16")]; + tensor slot_k_9_begin_0 = const()[name = string("slot_k_9_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor slot_k_9_end_0 = const()[name = string("slot_k_9_end_0"), val = tensor([5, 2, 512, 512])]; + tensor slot_k_9_end_mask_0 = const()[name = string("slot_k_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_9_cast_fp16 = slice_by_index(begin = slot_k_9_begin_0, end = slot_k_9_end_0, end_mask = slot_k_9_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("slot_k_9_cast_fp16")]; + tensor slot_v_9_begin_0 = const()[name = string("slot_v_9_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor slot_v_9_end_0 = const()[name = string("slot_v_9_end_0"), val = tensor([5, 2, 512, 512])]; + tensor slot_v_9_end_mask_0 = const()[name = string("slot_v_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_9_cast_fp16 = slice_by_index(begin = slot_v_9_begin_0, end = slot_v_9_end_0, end_mask = slot_v_9_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("slot_v_9_cast_fp16")]; + tensor var_3466_begin_0 = const()[name = string("op_3466_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_3466_end_0 = const()[name = string("op_3466_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3466_end_mask_0 = const()[name = string("op_3466_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3466_cast_fp16 = slice_by_index(begin = var_3466_begin_0, end = var_3466_end_0, end_mask = var_3466_end_mask_0, x = slot_k_9_cast_fp16)[name = string("op_3466_cast_fp16")]; + int32 var_3473 = const()[name = string("op_3473"), val = int32(2)]; + bool new_k_9_interleave_0 = const()[name = string("new_k_9_interleave_0"), val = bool(false)]; + tensor new_k_9_cast_fp16 = concat(axis = var_3473, interleave = new_k_9_interleave_0, values = (var_3466_cast_fp16, k_padded_9_cast_fp16))[name = string("new_k_9_cast_fp16")]; + tensor var_3489_begin_0 = const()[name = string("op_3489_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_3489_end_0 = const()[name = string("op_3489_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3489_end_mask_0 = const()[name = string("op_3489_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3489_cast_fp16 = slice_by_index(begin = var_3489_begin_0, end = var_3489_end_0, end_mask = var_3489_end_mask_0, x = slot_v_9_cast_fp16)[name = string("op_3489_cast_fp16")]; + int32 var_3496 = const()[name = string("op_3496"), val = int32(2)]; + bool new_v_9_interleave_0 = const()[name = string("new_v_9_interleave_0"), val = bool(false)]; + tensor new_v_9_cast_fp16 = concat(axis = var_3496, interleave = new_v_9_interleave_0, values = (var_3489_cast_fp16, v_padded_9_cast_fp16))[name = string("new_v_9_cast_fp16")]; + tensor var_3502_begin_0 = const()[name = string("op_3502_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3502_end_0 = const()[name = string("op_3502_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_3502_end_mask_0 = const()[name = string("op_3502_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3502_cast_fp16 = slice_by_index(begin = var_3502_begin_0, end = var_3502_end_0, end_mask = var_3502_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("op_3502_cast_fp16")]; + tensor var_3507_begin_0 = const()[name = string("op_3507_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_3507_end_0 = const()[name = string("op_3507_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_3507_end_mask_0 = const()[name = string("op_3507_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3507_cast_fp16 = slice_by_index(begin = var_3507_begin_0, end = var_3507_end_0, end_mask = var_3507_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("op_3507_cast_fp16")]; + int32 var_3509 = const()[name = string("op_3509"), val = int32(0)]; + bool K_sliding_out_9_interleave_0 = const()[name = string("K_sliding_out_9_interleave_0"), val = bool(false)]; + tensor K_sliding_out_9_cast_fp16 = concat(axis = var_3509, interleave = K_sliding_out_9_interleave_0, values = (var_3502_cast_fp16, new_k_9_cast_fp16, var_3507_cast_fp16))[name = string("K_sliding_out_9_cast_fp16")]; + tensor var_3515_begin_0 = const()[name = string("op_3515_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3515_end_0 = const()[name = string("op_3515_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_3515_end_mask_0 = const()[name = string("op_3515_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3515_cast_fp16 = slice_by_index(begin = var_3515_begin_0, end = var_3515_end_0, end_mask = var_3515_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("op_3515_cast_fp16")]; + tensor var_3520_begin_0 = const()[name = string("op_3520_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_3520_end_0 = const()[name = string("op_3520_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_3520_end_mask_0 = const()[name = string("op_3520_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3520_cast_fp16 = slice_by_index(begin = var_3520_begin_0, end = var_3520_end_0, end_mask = var_3520_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("op_3520_cast_fp16")]; + int32 var_3522 = const()[name = string("op_3522"), val = int32(0)]; + bool V_sliding_out_9_interleave_0 = const()[name = string("V_sliding_out_9_interleave_0"), val = bool(false)]; + tensor V_sliding_out_9_cast_fp16 = concat(axis = var_3522, interleave = V_sliding_out_9_interleave_0, values = (var_3515_cast_fp16, new_v_9_cast_fp16, var_3520_cast_fp16))[name = string("V_sliding_out_9_cast_fp16")]; + tensor var_3528_begin_0 = const()[name = string("op_3528_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3528_end_0 = const()[name = string("op_3528_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_3528_end_mask_0 = const()[name = string("op_3528_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3528_cast_fp16 = slice_by_index(begin = var_3528_begin_0, end = var_3528_end_0, end_mask = var_3528_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("op_3528_cast_fp16")]; + tensor K_for_attn_9_begin_0 = const()[name = string("K_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_9_end_0 = const()[name = string("K_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_9_end_mask_0 = const()[name = string("K_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_9_cast_fp16 = slice_by_index(begin = K_for_attn_9_begin_0, end = K_for_attn_9_end_0, end_mask = K_for_attn_9_end_mask_0, x = var_3528_cast_fp16)[name = string("K_for_attn_9_cast_fp16")]; + tensor var_3538_begin_0 = const()[name = string("op_3538_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3538_end_0 = const()[name = string("op_3538_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_3538_end_mask_0 = const()[name = string("op_3538_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3538_cast_fp16 = slice_by_index(begin = var_3538_begin_0, end = var_3538_end_0, end_mask = var_3538_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("op_3538_cast_fp16")]; + tensor V_for_attn_9_begin_0 = const()[name = string("V_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_9_end_0 = const()[name = string("V_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_9_end_mask_0 = const()[name = string("V_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_9_cast_fp16 = slice_by_index(begin = V_for_attn_9_begin_0, end = V_for_attn_9_end_0, end_mask = V_for_attn_9_end_mask_0, x = var_3538_cast_fp16)[name = string("V_for_attn_9_cast_fp16")]; + tensor transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = K_for_attn_9_cast_fp16)[name = string("transpose_153")]; + tensor tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_16, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")]; + tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_152")]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_17, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = V_for_attn_9_cast_fp16)[name = string("transpose_151")]; + tensor tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_18, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_150")]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_19, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")]; + tensor V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor transpose_62_cast_fp16 = transpose(perm = transpose_62_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_149")]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_59_cast_fp16, y = transpose_62_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_87_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_87_cast_fp16)[name = string("reduce_max_4")]; + tensor var_3573 = sub(x = x_87_cast_fp16, y = reduce_max_4)[name = string("op_3573")]; + tensor var_3579 = exp(x = var_3573)[name = string("op_3579")]; + tensor var_3589_axes_0 = const()[name = string("op_3589_axes_0"), val = tensor([-1])]; + bool var_3589_keep_dims_0 = const()[name = string("op_3589_keep_dims_0"), val = bool(true)]; + tensor var_3589 = reduce_sum(axes = var_3589_axes_0, keep_dims = var_3589_keep_dims_0, x = var_3579)[name = string("op_3589")]; + tensor var_3595_cast_fp16 = real_div(x = var_3579, y = var_3589)[name = string("op_3595_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_148")]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_3595_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3606 = const()[name = string("op_3606"), val = tensor([0, 2, 1, 3])]; + tensor var_3613 = const()[name = string("op_3613"), val = tensor([1, 3, -1])]; + tensor var_3607_cast_fp16 = transpose(perm = var_3606, x = attn_output_25_cast_fp16)[name = string("transpose_147")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_3613, x = var_3607_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_3618 = const()[name = string("op_3618"), val = tensor([0, 2, 1])]; + string var_3634_pad_type_0 = const()[name = string("op_3634_pad_type_0"), val = string("valid")]; + int32 var_3634_groups_0 = const()[name = string("op_3634_groups_0"), val = int32(1)]; + tensor var_3634_strides_0 = const()[name = string("op_3634_strides_0"), val = tensor([1])]; + tensor var_3634_pad_0 = const()[name = string("op_3634_pad_0"), val = tensor([0, 0])]; + tensor var_3634_dilations_0 = const()[name = string("op_3634_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543157440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545778944))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3619_cast_fp16 = transpose(perm = var_3618, x = attn_output_27_cast_fp16)[name = string("transpose_146")]; + tensor var_3634_cast_fp16 = conv(dilations = var_3634_dilations_0, groups = var_3634_groups_0, pad = var_3634_pad_0, pad_type = var_3634_pad_type_0, strides = var_3634_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3619_cast_fp16)[name = string("op_3634_cast_fp16")]; + tensor var_3638 = const()[name = string("op_3638"), val = tensor([0, 2, 1])]; + int32 var_3644 = const()[name = string("op_3644"), val = int32(-1)]; + fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_91_cast_fp16 = transpose(perm = var_3638, x = var_3634_cast_fp16)[name = string("transpose_145")]; + tensor var_3646_cast_fp16 = mul(x = x_91_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_3646_cast_fp16")]; + bool input_135_interleave_0 = const()[name = string("input_135_interleave_0"), val = bool(false)]; + tensor input_135_cast_fp16 = concat(axis = var_3644, interleave = input_135_interleave_0, values = (x_91_cast_fp16, var_3646_cast_fp16))[name = string("input_135_cast_fp16")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_3641_to_fp16 = const()[name = string("op_3641_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_3641_to_fp16, x = input_135_cast_fp16)[name = string("normed_125_cast_fp16")]; + tensor var_3651_split_sizes_0 = const()[name = string("op_3651_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3651_axis_0 = const()[name = string("op_3651_axis_0"), val = int32(-1)]; + tensor var_3651_cast_fp16_0, tensor var_3651_cast_fp16_1 = split(axis = var_3651_axis_0, split_sizes = var_3651_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_3651_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545781568)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_3651_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_93_cast_fp16")]; + int32 var_3660 = const()[name = string("op_3660"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3662_cast_fp16 = mul(x = x_93_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3662_cast_fp16")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137_cast_fp16 = concat(axis = var_3660, interleave = input_137_interleave_0, values = (x_93_cast_fp16, var_3662_cast_fp16))[name = string("input_137_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_3657_to_fp16 = const()[name = string("op_3657_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_3657_to_fp16, x = input_137_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_3667_split_sizes_0 = const()[name = string("op_3667_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3667_axis_0 = const()[name = string("op_3667_axis_0"), val = int32(-1)]; + tensor var_3667_cast_fp16_0, tensor var_3667_cast_fp16_1 = split(axis = var_3667_axis_0, split_sizes = var_3667_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_3667_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545786752)))]; + tensor h_27_cast_fp16 = mul(x = var_3667_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_3678 = const()[name = string("op_3678"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_3679 = transpose(perm = var_3678, x = h_27_cast_fp16)[name = string("transpose_144")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_3679)[name = string("input_139")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_139)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_139)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_141 = mul(x = gate_19, y = up_9)[name = string("input_141")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_141)[name = string("mlp_out_9")]; + tensor var_3719_axes_0 = const()[name = string("op_3719_axes_0"), val = tensor([2])]; + tensor var_3719 = squeeze(axes = var_3719_axes_0, x = mlp_out_9)[name = string("op_3719")]; + tensor var_3723 = const()[name = string("op_3723"), val = tensor([0, 2, 1])]; + int32 var_3729 = const()[name = string("op_3729"), val = int32(-1)]; + fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; + tensor x_95 = transpose(perm = var_3723, x = var_3719)[name = string("transpose_143")]; + tensor var_3731 = mul(x = x_95, y = const_57_promoted)[name = string("op_3731")]; + bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; + tensor input_143 = concat(axis = var_3729, interleave = input_143_interleave_0, values = (x_95, var_3731))[name = string("input_143")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_3726_to_fp16 = const()[name = string("op_3726_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_3726_to_fp16, x = input_143)[name = string("normed_133_cast_fp16")]; + tensor var_3736_split_sizes_0 = const()[name = string("op_3736_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3736_axis_0 = const()[name = string("op_3736_axis_0"), val = int32(-1)]; + tensor var_3736_0, tensor var_3736_1 = split(axis = var_3736_axis_0, split_sizes = var_3736_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_3736")]; + tensor hidden_states_43 = mul(x = var_3736_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_93_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 4096])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 3, 4352])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_3764 = const()[name = string("op_3764"), val = tensor([0, 2, 1])]; + tensor input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor([2])]; + tensor var_3765 = transpose(perm = var_3764, x = hidden_states_45_cast_fp16)[name = string("transpose_142")]; + tensor input_145 = expand_dims(axes = input_145_axes_0, x = var_3765)[name = string("input_145")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_145)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_3784 = const()[name = string("op_3784"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_3785_cast_fp16 = transpose(perm = var_3784, x = per_layer_slice_9_cast_fp16)[name = string("transpose_141")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_3785_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_147_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_147_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545791936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546119680))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_147_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_3801_axes_0 = const()[name = string("op_3801_axes_0"), val = tensor([2])]; + tensor var_3801_cast_fp16 = squeeze(axes = var_3801_axes_0, x = gated_29_cast_fp16)[name = string("op_3801_cast_fp16")]; + tensor var_3805 = const()[name = string("op_3805"), val = tensor([0, 2, 1])]; + int32 var_3811 = const()[name = string("op_3811"), val = int32(-1)]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_97_cast_fp16 = transpose(perm = var_3805, x = var_3801_cast_fp16)[name = string("transpose_140")]; + tensor var_3813_cast_fp16 = mul(x = x_97_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_3813_cast_fp16")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149_cast_fp16 = concat(axis = var_3811, interleave = input_149_interleave_0, values = (x_97_cast_fp16, var_3813_cast_fp16))[name = string("input_149_cast_fp16")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_3808_to_fp16 = const()[name = string("op_3808_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3808_to_fp16, x = input_149_cast_fp16)[name = string("normed_137_cast_fp16")]; + tensor var_3818_split_sizes_0 = const()[name = string("op_3818_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3818_axis_0 = const()[name = string("op_3818_axis_0"), val = int32(-1)]; + tensor var_3818_cast_fp16_0, tensor var_3818_cast_fp16_1 = split(axis = var_3818_axis_0, split_sizes = var_3818_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3818_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546122304)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_3818_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = tensor([0x1.46p-1])]; + tensor x_99_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_59_promoted_to_fp16)[name = string("x_99_cast_fp16")]; + int32 var_3833 = const()[name = string("op_3833"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3835_cast_fp16 = mul(x = x_99_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3835_cast_fp16")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151_cast_fp16 = concat(axis = var_3833, interleave = input_151_interleave_0, values = (x_99_cast_fp16, var_3835_cast_fp16))[name = string("input_151_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_3830_to_fp16 = const()[name = string("op_3830_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3830_to_fp16, x = input_151_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_3840_split_sizes_0 = const()[name = string("op_3840_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3840_axis_0 = const()[name = string("op_3840_axis_0"), val = int32(-1)]; + tensor var_3840_cast_fp16_0, tensor var_3840_cast_fp16_1 = split(axis = var_3840_axis_0, split_sizes = var_3840_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3840_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546127488)))]; + tensor h_31_cast_fp16 = mul(x = var_3840_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_3846 = const()[name = string("op_3846"), val = tensor([0, 2, 1])]; + tensor var_3849_axes_0 = const()[name = string("op_3849_axes_0"), val = tensor([2])]; + tensor var_3847_cast_fp16 = transpose(perm = var_3846, x = h_31_cast_fp16)[name = string("transpose_139")]; + tensor var_3849_cast_fp16 = expand_dims(axes = var_3849_axes_0, x = var_3847_cast_fp16)[name = string("op_3849_cast_fp16")]; + string q_61_pad_type_0 = const()[name = string("q_61_pad_type_0"), val = string("valid")]; + tensor q_61_strides_0 = const()[name = string("q_61_strides_0"), val = tensor([1, 1])]; + tensor q_61_pad_0 = const()[name = string("q_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_61_dilations_0 = const()[name = string("q_61_dilations_0"), val = tensor([1, 1])]; + int32 q_61_groups_0 = const()[name = string("q_61_groups_0"), val = int32(1)]; + tensor q_61 = conv(dilations = q_61_dilations_0, groups = q_61_groups_0, pad = q_61_pad_0, pad_type = q_61_pad_type_0, strides = q_61_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_3849_cast_fp16)[name = string("q_61")]; + tensor var_3870 = const()[name = string("op_3870"), val = tensor([1, 8, 512, 3])]; + tensor var_3871 = reshape(shape = var_3870, x = q_61)[name = string("op_3871")]; + tensor transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_3894 = const()[name = string("op_3894"), val = tensor([3, 8, 512])]; + tensor transpose_63 = transpose(perm = transpose_63_perm_0, x = var_3871)[name = string("transpose_138")]; + tensor x_101 = reshape(shape = var_3894, x = transpose_63)[name = string("x_101")]; + int32 var_3900 = const()[name = string("op_3900"), val = int32(-1)]; + fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; + tensor var_3902 = mul(x = x_101, y = const_61_promoted)[name = string("op_3902")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155 = concat(axis = var_3900, interleave = input_155_interleave_0, values = (x_101, var_3902))[name = string("input_155")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_3897_to_fp16 = const()[name = string("op_3897_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_3897_to_fp16, x = input_155)[name = string("normed_145_cast_fp16")]; + tensor var_3907_split_sizes_0 = const()[name = string("op_3907_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3907_axis_0 = const()[name = string("op_3907_axis_0"), val = int32(-1)]; + tensor var_3907_0, tensor var_3907_1 = split(axis = var_3907_axis_0, split_sizes = var_3907_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_3907")]; + tensor q_65 = mul(x = var_3907_0, y = layers_5_self_attn_q_norm_weight)[name = string("q_65")]; + tensor var_3914 = const()[name = string("op_3914"), val = tensor([1, 3, 8, 512])]; + tensor var_3915 = reshape(shape = var_3914, x = q_65)[name = string("op_3915")]; + tensor var_3920 = const()[name = string("op_3920"), val = tensor([0, 2, 1, 3])]; + tensor q_67 = transpose(perm = var_3920, x = var_3915)[name = string("transpose_137")]; + tensor var_3922_cast_fp16 = mul(x = q_67, y = cos_f)[name = string("op_3922_cast_fp16")]; + tensor var_3923_split_sizes_0 = const()[name = string("op_3923_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3923_axis_0 = const()[name = string("op_3923_axis_0"), val = int32(-1)]; + tensor var_3923_0, tensor var_3923_1 = split(axis = var_3923_axis_0, split_sizes = var_3923_split_sizes_0, x = q_67)[name = string("op_3923")]; + fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; + tensor var_3925 = mul(x = var_3923_1, y = const_62_promoted)[name = string("op_3925")]; + int32 var_3927 = const()[name = string("op_3927"), val = int32(-1)]; + bool var_3928_interleave_0 = const()[name = string("op_3928_interleave_0"), val = bool(false)]; + tensor var_3928 = concat(axis = var_3927, interleave = var_3928_interleave_0, values = (var_3925, var_3923_0))[name = string("op_3928")]; + tensor var_3929_cast_fp16 = mul(x = var_3928, y = sin_f)[name = string("op_3929_cast_fp16")]; + tensor q_71_cast_fp16 = add(x = var_3922_cast_fp16, y = var_3929_cast_fp16)[name = string("q_71_cast_fp16")]; + string k_31_pad_type_0 = const()[name = string("k_31_pad_type_0"), val = string("valid")]; + tensor k_31_strides_0 = const()[name = string("k_31_strides_0"), val = tensor([1, 1])]; + tensor k_31_pad_0 = const()[name = string("k_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_31_dilations_0 = const()[name = string("k_31_dilations_0"), val = tensor([1, 1])]; + int32 k_31_groups_0 = const()[name = string("k_31_groups_0"), val = int32(1)]; + tensor k_31 = conv(dilations = k_31_dilations_0, groups = k_31_groups_0, pad = k_31_pad_0, pad_type = k_31_pad_type_0, strides = k_31_strides_0, weight = layers_5_self_attn_k_proj_weight_palettized, x = var_3849_cast_fp16)[name = string("k_31")]; + tensor var_3947 = const()[name = string("op_3947"), val = tensor([1, 2, 512, 3])]; + tensor var_3948 = reshape(shape = var_3947, x = k_31)[name = string("op_3948")]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_11_pad_type_0 = const()[name = string("v_11_pad_type_0"), val = string("valid")]; + tensor v_11_strides_0 = const()[name = string("v_11_strides_0"), val = tensor([1, 1])]; + tensor v_11_pad_0 = const()[name = string("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_11_dilations_0 = const()[name = string("v_11_dilations_0"), val = tensor([1, 1])]; + int32 v_11_groups_0 = const()[name = string("v_11_groups_0"), val = int32(1)]; + tensor v_11 = conv(dilations = v_11_dilations_0, groups = v_11_groups_0, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = v_11_strides_0, weight = layers_5_self_attn_v_proj_weight_palettized, x = var_3849_cast_fp16)[name = string("v_11")]; + tensor var_3975 = const()[name = string("op_3975"), val = tensor([1, 2, 512, 3])]; + tensor var_3976 = reshape(shape = var_3975, x = v_11)[name = string("op_3976")]; + tensor var_3981 = const()[name = string("op_3981"), val = tensor([0, 1, 3, 2])]; + tensor var_3999 = const()[name = string("op_3999"), val = tensor([3, 2, 512])]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = var_3948)[name = string("transpose_136")]; + tensor x_103 = reshape(shape = var_3999, x = transpose_64)[name = string("x_103")]; + int32 var_4005 = const()[name = string("op_4005"), val = int32(-1)]; + fp16 const_63_promoted = const()[name = string("const_63_promoted"), val = fp16(-0x1p+0)]; + tensor var_4007 = mul(x = x_103, y = const_63_promoted)[name = string("op_4007")]; + bool input_157_interleave_0 = const()[name = string("input_157_interleave_0"), val = bool(false)]; + tensor input_157 = concat(axis = var_4005, interleave = input_157_interleave_0, values = (x_103, var_4007))[name = string("input_157")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_4002_to_fp16 = const()[name = string("op_4002_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_4002_to_fp16, x = input_157)[name = string("normed_149_cast_fp16")]; + tensor var_4012_split_sizes_0 = const()[name = string("op_4012_split_sizes_0"), val = tensor([512, 512])]; + int32 var_4012_axis_0 = const()[name = string("op_4012_axis_0"), val = int32(-1)]; + tensor var_4012_0, tensor var_4012_1 = split(axis = var_4012_axis_0, split_sizes = var_4012_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_4012")]; + tensor k_35 = mul(x = var_4012_0, y = layers_5_self_attn_k_norm_weight)[name = string("k_35")]; + tensor var_4019 = const()[name = string("op_4019"), val = tensor([1, 3, 2, 512])]; + tensor var_4020 = reshape(shape = var_4019, x = k_35)[name = string("op_4020")]; + tensor var_4025 = const()[name = string("op_4025"), val = tensor([0, 2, 1, 3])]; + fp16 var_4027_promoted = const()[name = string("op_4027_promoted"), val = fp16(0x1p+1)]; + tensor var_3982 = transpose(perm = var_3981, x = var_3976)[name = string("transpose_135")]; + tensor var_4028 = pow(x = var_3982, y = var_4027_promoted)[name = string("op_4028")]; + tensor var_4033_axes_0 = const()[name = string("op_4033_axes_0"), val = tensor([-1])]; + bool var_4033_keep_dims_0 = const()[name = string("op_4033_keep_dims_0"), val = bool(true)]; + tensor var_4033 = reduce_mean(axes = var_4033_axes_0, keep_dims = var_4033_keep_dims_0, x = var_4028)[name = string("op_4033")]; + fp16 var_4035_to_fp16 = const()[name = string("op_4035_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_11_cast_fp16 = add(x = var_4033, y = var_4035_to_fp16)[name = string("mean_sq_11_cast_fp16")]; + fp32 var_4037_epsilon_0 = const()[name = string("op_4037_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4037_cast_fp16 = rsqrt(epsilon = var_4037_epsilon_0, x = mean_sq_11_cast_fp16)[name = string("op_4037_cast_fp16")]; + tensor v_13_cast_fp16 = mul(x = var_3982, y = var_4037_cast_fp16)[name = string("v_13_cast_fp16")]; + tensor q_69 = transpose(perm = var_4025, x = var_4020)[name = string("transpose_134")]; + tensor var_4039_cast_fp16 = mul(x = q_69, y = cos_f)[name = string("op_4039_cast_fp16")]; + tensor var_4040_split_sizes_0 = const()[name = string("op_4040_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4040_axis_0 = const()[name = string("op_4040_axis_0"), val = int32(-1)]; + tensor var_4040_0, tensor var_4040_1 = split(axis = var_4040_axis_0, split_sizes = var_4040_split_sizes_0, x = q_69)[name = string("op_4040")]; + fp16 const_64_promoted = const()[name = string("const_64_promoted"), val = fp16(-0x1p+0)]; + tensor var_4042 = mul(x = var_4040_1, y = const_64_promoted)[name = string("op_4042")]; + int32 var_4044 = const()[name = string("op_4044"), val = int32(-1)]; + bool var_4045_interleave_0 = const()[name = string("op_4045_interleave_0"), val = bool(false)]; + tensor var_4045 = concat(axis = var_4044, interleave = var_4045_interleave_0, values = (var_4042, var_4040_0))[name = string("op_4045")]; + tensor var_4046_cast_fp16 = mul(x = var_4045, y = sin_f)[name = string("op_4046_cast_fp16")]; + tensor k_37_cast_fp16 = add(x = var_4039_cast_fp16, y = var_4046_cast_fp16)[name = string("k_37_cast_fp16")]; + tensor var_4055_reps_0 = const()[name = string("op_4055_reps_0"), val = tensor([1, 2, 1, 1])]; + tensor var_4055_cast_fp16 = tile(reps = var_4055_reps_0, x = update_indicator)[name = string("op_4055_cast_fp16")]; + bool k_scattered_1_transpose_x_0 = const()[name = string("k_scattered_1_transpose_x_0"), val = bool(false)]; + bool k_scattered_1_transpose_y_0 = const()[name = string("k_scattered_1_transpose_y_0"), val = bool(false)]; + tensor k_scattered_1_cast_fp16 = matmul(transpose_x = k_scattered_1_transpose_x_0, transpose_y = k_scattered_1_transpose_y_0, x = var_4055_cast_fp16, y = k_37_cast_fp16)[name = string("k_scattered_1_cast_fp16")]; + bool v_scattered_1_transpose_x_0 = const()[name = string("v_scattered_1_transpose_x_0"), val = bool(false)]; + bool v_scattered_1_transpose_y_0 = const()[name = string("v_scattered_1_transpose_y_0"), val = bool(false)]; + tensor v_scattered_1_cast_fp16 = matmul(transpose_x = v_scattered_1_transpose_x_0, transpose_y = v_scattered_1_transpose_y_0, x = var_4055_cast_fp16, y = v_13_cast_fp16)[name = string("v_scattered_1_cast_fp16")]; + tensor var_4069_axes_0 = const()[name = string("op_4069_axes_0"), val = tensor([-1])]; + bool var_4069_keep_dims_0 = const()[name = string("op_4069_keep_dims_0"), val = bool(true)]; + tensor var_4069_cast_fp16 = reduce_sum(axes = var_4069_axes_0, keep_dims = var_4069_keep_dims_0, x = update_indicator)[name = string("op_4069_cast_fp16")]; + tensor slot_k_11_begin_0 = const()[name = string("slot_k_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor slot_k_11_end_0 = const()[name = string("slot_k_11_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor slot_k_11_end_mask_0 = const()[name = string("slot_k_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_11_cast_fp16 = slice_by_index(begin = slot_k_11_begin_0, end = slot_k_11_end_0, end_mask = slot_k_11_end_mask_0, x = K_full_in)[name = string("slot_k_11_cast_fp16")]; + tensor slot_v_11_begin_0 = const()[name = string("slot_v_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor slot_v_11_end_0 = const()[name = string("slot_v_11_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor slot_v_11_end_mask_0 = const()[name = string("slot_v_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_11_cast_fp16 = slice_by_index(begin = slot_v_11_begin_0, end = slot_v_11_end_0, end_mask = slot_v_11_end_mask_0, x = V_full_in)[name = string("slot_v_11_cast_fp16")]; + fp16 var_4080_promoted_to_fp16 = const()[name = string("op_4080_promoted_to_fp16"), val = fp16(0x1p+0)]; + tensor var_4082_cast_fp16 = sub(x = var_4080_promoted_to_fp16, y = var_4069_cast_fp16)[name = string("op_4082_cast_fp16")]; + tensor var_4083_cast_fp16 = mul(x = slot_k_11_cast_fp16, y = var_4082_cast_fp16)[name = string("op_4083_cast_fp16")]; + tensor new_k_11_cast_fp16 = add(x = var_4083_cast_fp16, y = k_scattered_1_cast_fp16)[name = string("new_k_11_cast_fp16")]; + tensor var_4089_cast_fp16 = mul(x = slot_v_11_cast_fp16, y = var_4082_cast_fp16)[name = string("op_4089_cast_fp16")]; + tensor new_v_11_cast_fp16 = add(x = var_4089_cast_fp16, y = v_scattered_1_cast_fp16)[name = string("new_v_11_cast_fp16")]; + tensor var_4101_begin_0 = const()[name = string("op_4101_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_4101_end_0 = const()[name = string("op_4101_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_4101_end_mask_0 = const()[name = string("op_4101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4101_cast_fp16 = slice_by_index(begin = var_4101_begin_0, end = var_4101_end_0, end_mask = var_4101_end_mask_0, x = K_full_in)[name = string("op_4101_cast_fp16")]; + int32 var_4103 = const()[name = string("op_4103"), val = int32(0)]; + bool K_full_out_1_interleave_0 = const()[name = string("K_full_out_1_interleave_0"), val = bool(false)]; + tensor K_full_out_1_cast_fp16 = concat(axis = var_4103, interleave = K_full_out_1_interleave_0, values = (new_k_11_cast_fp16, var_4101_cast_fp16))[name = string("K_full_out_1_cast_fp16")]; + tensor var_4114_begin_0 = const()[name = string("op_4114_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_4114_end_0 = const()[name = string("op_4114_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_4114_end_mask_0 = const()[name = string("op_4114_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4114_cast_fp16 = slice_by_index(begin = var_4114_begin_0, end = var_4114_end_0, end_mask = var_4114_end_mask_0, x = V_full_in)[name = string("op_4114_cast_fp16")]; + int32 var_4116 = const()[name = string("op_4116"), val = int32(0)]; + bool V_full_out_1_interleave_0 = const()[name = string("V_full_out_1_interleave_0"), val = bool(false)]; + tensor V_full_out_1_cast_fp16 = concat(axis = var_4116, interleave = V_full_out_1_interleave_0, values = (new_v_11_cast_fp16, var_4114_cast_fp16))[name = string("V_full_out_1_cast_fp16")]; + tensor var_4122_begin_0 = const()[name = string("op_4122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4122_end_0 = const()[name = string("op_4122_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_4122_end_mask_0 = const()[name = string("op_4122_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4122_cast_fp16 = slice_by_index(begin = var_4122_begin_0, end = var_4122_end_0, end_mask = var_4122_end_mask_0, x = K_full_out_1_cast_fp16)[name = string("op_4122_cast_fp16")]; + tensor var_4132_begin_0 = const()[name = string("op_4132_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4132_end_0 = const()[name = string("op_4132_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_4132_end_mask_0 = const()[name = string("op_4132_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4132_cast_fp16 = slice_by_index(begin = var_4132_begin_0, end = var_4132_end_0, end_mask = var_4132_end_mask_0, x = V_full_out_1_cast_fp16)[name = string("op_4132_cast_fp16")]; + tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = var_4122_cast_fp16)[name = string("transpose_133")]; + tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_22, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_132")]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_23, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = var_4132_cast_fp16)[name = string("transpose_131")]; + tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; + tensor concat_24 = const()[name = string("concat_24"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_24, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; + tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_25 = const()[name = string("concat_25"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_130")]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_25, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor V_expanded_11_perm_0 = const()[name = string("V_expanded_11_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor transpose_65_cast_fp16 = transpose(perm = transpose_65_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_129")]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_71_cast_fp16, y = transpose_65_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_full)[name = string("x_107_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_107_cast_fp16)[name = string("reduce_max_5")]; + tensor var_4167 = sub(x = x_107_cast_fp16, y = reduce_max_5)[name = string("op_4167")]; + tensor var_4173 = exp(x = var_4167)[name = string("op_4173")]; + tensor var_4183_axes_0 = const()[name = string("op_4183_axes_0"), val = tensor([-1])]; + bool var_4183_keep_dims_0 = const()[name = string("op_4183_keep_dims_0"), val = bool(true)]; + tensor var_4183 = reduce_sum(axes = var_4183_axes_0, keep_dims = var_4183_keep_dims_0, x = var_4173)[name = string("op_4183")]; + tensor var_4189_cast_fp16 = real_div(x = var_4173, y = var_4183)[name = string("op_4189_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor V_expanded_11_cast_fp16 = transpose(perm = V_expanded_11_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_128")]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_4189_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_4200 = const()[name = string("op_4200"), val = tensor([0, 2, 1, 3])]; + tensor var_4207 = const()[name = string("op_4207"), val = tensor([1, 3, -1])]; + tensor var_4201_cast_fp16 = transpose(perm = var_4200, x = attn_output_31_cast_fp16)[name = string("transpose_127")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_4207, x = var_4201_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_4212 = const()[name = string("op_4212"), val = tensor([0, 2, 1])]; + string var_4228_pad_type_0 = const()[name = string("op_4228_pad_type_0"), val = string("valid")]; + int32 var_4228_groups_0 = const()[name = string("op_4228_groups_0"), val = int32(1)]; + tensor var_4228_strides_0 = const()[name = string("op_4228_strides_0"), val = tensor([1])]; + tensor var_4228_pad_0 = const()[name = string("op_4228_pad_0"), val = tensor([0, 0])]; + tensor var_4228_dilations_0 = const()[name = string("op_4228_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546132672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551375616))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4213_cast_fp16 = transpose(perm = var_4212, x = attn_output_33_cast_fp16)[name = string("transpose_126")]; + tensor var_4228_cast_fp16 = conv(dilations = var_4228_dilations_0, groups = var_4228_groups_0, pad = var_4228_pad_0, pad_type = var_4228_pad_type_0, strides = var_4228_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_4213_cast_fp16)[name = string("op_4228_cast_fp16")]; + tensor var_4232 = const()[name = string("op_4232"), val = tensor([0, 2, 1])]; + int32 var_4238 = const()[name = string("op_4238"), val = int32(-1)]; + fp16 const_65_promoted_to_fp16 = const()[name = string("const_65_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_111_cast_fp16 = transpose(perm = var_4232, x = var_4228_cast_fp16)[name = string("transpose_125")]; + tensor var_4240_cast_fp16 = mul(x = x_111_cast_fp16, y = const_65_promoted_to_fp16)[name = string("op_4240_cast_fp16")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161_cast_fp16 = concat(axis = var_4238, interleave = input_161_interleave_0, values = (x_111_cast_fp16, var_4240_cast_fp16))[name = string("input_161_cast_fp16")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_4235_to_fp16 = const()[name = string("op_4235_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_4235_to_fp16, x = input_161_cast_fp16)[name = string("normed_153_cast_fp16")]; + tensor var_4245_split_sizes_0 = const()[name = string("op_4245_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4245_axis_0 = const()[name = string("op_4245_axis_0"), val = int32(-1)]; + tensor var_4245_cast_fp16_0, tensor var_4245_cast_fp16_1 = split(axis = var_4245_axis_0, split_sizes = var_4245_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_4245_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551378240)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_4245_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_113_cast_fp16 = add(x = x_99_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_113_cast_fp16")]; + int32 var_4254 = const()[name = string("op_4254"), val = int32(-1)]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4256_cast_fp16 = mul(x = x_113_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_4256_cast_fp16")]; + bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)]; + tensor input_163_cast_fp16 = concat(axis = var_4254, interleave = input_163_interleave_0, values = (x_113_cast_fp16, var_4256_cast_fp16))[name = string("input_163_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_4251_to_fp16 = const()[name = string("op_4251_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_4251_to_fp16, x = input_163_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_4261_split_sizes_0 = const()[name = string("op_4261_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4261_axis_0 = const()[name = string("op_4261_axis_0"), val = int32(-1)]; + tensor var_4261_cast_fp16_0, tensor var_4261_cast_fp16_1 = split(axis = var_4261_axis_0, split_sizes = var_4261_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_4261_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551383424)))]; + tensor h_33_cast_fp16 = mul(x = var_4261_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_4272 = const()[name = string("op_4272"), val = tensor([0, 2, 1])]; + tensor input_165_axes_0 = const()[name = string("input_165_axes_0"), val = tensor([2])]; + tensor var_4273 = transpose(perm = var_4272, x = h_33_cast_fp16)[name = string("transpose_124")]; + tensor input_165 = expand_dims(axes = input_165_axes_0, x = var_4273)[name = string("input_165")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_165)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_165)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_167 = mul(x = gate_23, y = up_11)[name = string("input_167")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_167)[name = string("mlp_out_11")]; + tensor var_4313_axes_0 = const()[name = string("op_4313_axes_0"), val = tensor([2])]; + tensor var_4313 = squeeze(axes = var_4313_axes_0, x = mlp_out_11)[name = string("op_4313")]; + tensor var_4317 = const()[name = string("op_4317"), val = tensor([0, 2, 1])]; + int32 var_4323 = const()[name = string("op_4323"), val = int32(-1)]; + fp16 const_67_promoted = const()[name = string("const_67_promoted"), val = fp16(-0x1p+0)]; + tensor x_115 = transpose(perm = var_4317, x = var_4313)[name = string("transpose_123")]; + tensor var_4325 = mul(x = x_115, y = const_67_promoted)[name = string("op_4325")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169 = concat(axis = var_4323, interleave = input_169_interleave_0, values = (x_115, var_4325))[name = string("input_169")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_4320_to_fp16 = const()[name = string("op_4320_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_4320_to_fp16, x = input_169)[name = string("normed_161_cast_fp16")]; + tensor var_4330_split_sizes_0 = const()[name = string("op_4330_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4330_axis_0 = const()[name = string("op_4330_axis_0"), val = int32(-1)]; + tensor var_4330_0, tensor var_4330_1 = split(axis = var_4330_axis_0, split_sizes = var_4330_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_4330")]; + tensor hidden_states_53 = mul(x = var_4330_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_113_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 4352])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 3, 4608])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_4358 = const()[name = string("op_4358"), val = tensor([0, 2, 1])]; + tensor input_171_axes_0 = const()[name = string("input_171_axes_0"), val = tensor([2])]; + tensor var_4359 = transpose(perm = var_4358, x = hidden_states_55_cast_fp16)[name = string("transpose_122")]; + tensor input_171 = expand_dims(axes = input_171_axes_0, x = var_4359)[name = string("input_171")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_171)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_4378 = const()[name = string("op_4378"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_4379_cast_fp16 = transpose(perm = var_4378, x = per_layer_slice_11_cast_fp16)[name = string("transpose_121")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_4379_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_173_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_173_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551388608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551716352))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_173_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_4395_axes_0 = const()[name = string("op_4395_axes_0"), val = tensor([2])]; + tensor var_4395_cast_fp16 = squeeze(axes = var_4395_axes_0, x = gated_35_cast_fp16)[name = string("op_4395_cast_fp16")]; + tensor var_4399 = const()[name = string("op_4399"), val = tensor([0, 2, 1])]; + int32 var_4405 = const()[name = string("op_4405"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_117_cast_fp16 = transpose(perm = var_4399, x = var_4395_cast_fp16)[name = string("transpose_120")]; + tensor var_4407_cast_fp16 = mul(x = x_117_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_4407_cast_fp16")]; + bool input_175_interleave_0 = const()[name = string("input_175_interleave_0"), val = bool(false)]; + tensor input_175_cast_fp16 = concat(axis = var_4405, interleave = input_175_interleave_0, values = (x_117_cast_fp16, var_4407_cast_fp16))[name = string("input_175_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_4402_to_fp16 = const()[name = string("op_4402_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_4402_to_fp16, x = input_175_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor var_4412_split_sizes_0 = const()[name = string("op_4412_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4412_axis_0 = const()[name = string("op_4412_axis_0"), val = int32(-1)]; + tensor var_4412_cast_fp16_0, tensor var_4412_cast_fp16_1 = split(axis = var_4412_axis_0, split_sizes = var_4412_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_4412_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551718976)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_4412_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = tensor([0x1.b2p-2])]; + tensor x_119_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_69_promoted_to_fp16)[name = string("x_119_cast_fp16")]; + int32 var_4427 = const()[name = string("op_4427"), val = int32(-1)]; + fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4429_cast_fp16 = mul(x = x_119_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_4429_cast_fp16")]; + bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; + tensor input_177_cast_fp16 = concat(axis = var_4427, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_4429_cast_fp16))[name = string("input_177_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_4424_to_fp16 = const()[name = string("op_4424_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_4424_to_fp16, x = input_177_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_4434_split_sizes_0 = const()[name = string("op_4434_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4434_axis_0 = const()[name = string("op_4434_axis_0"), val = int32(-1)]; + tensor var_4434_cast_fp16_0, tensor var_4434_cast_fp16_1 = split(axis = var_4434_axis_0, split_sizes = var_4434_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_4434_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551724160)))]; + tensor h_37_cast_fp16 = mul(x = var_4434_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_4440 = const()[name = string("op_4440"), val = tensor([0, 2, 1])]; + tensor var_4443_axes_0 = const()[name = string("op_4443_axes_0"), val = tensor([2])]; + tensor var_4441_cast_fp16 = transpose(perm = var_4440, x = h_37_cast_fp16)[name = string("transpose_119")]; + tensor var_4443_cast_fp16 = expand_dims(axes = var_4443_axes_0, x = var_4441_cast_fp16)[name = string("op_4443_cast_fp16")]; + string q_73_pad_type_0 = const()[name = string("q_73_pad_type_0"), val = string("valid")]; + tensor q_73_strides_0 = const()[name = string("q_73_strides_0"), val = tensor([1, 1])]; + tensor q_73_pad_0 = const()[name = string("q_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_73_dilations_0 = const()[name = string("q_73_dilations_0"), val = tensor([1, 1])]; + int32 q_73_groups_0 = const()[name = string("q_73_groups_0"), val = int32(1)]; + tensor q_73 = conv(dilations = q_73_dilations_0, groups = q_73_groups_0, pad = q_73_pad_0, pad_type = q_73_pad_type_0, strides = q_73_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_4443_cast_fp16)[name = string("q_73")]; + tensor var_4464 = const()[name = string("op_4464"), val = tensor([1, 8, 256, 3])]; + tensor var_4465 = reshape(shape = var_4464, x = q_73)[name = string("op_4465")]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_4488 = const()[name = string("op_4488"), val = tensor([3, 8, 256])]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = var_4465)[name = string("transpose_118")]; + tensor x_121 = reshape(shape = var_4488, x = transpose_66)[name = string("x_121")]; + int32 var_4494 = const()[name = string("op_4494"), val = int32(-1)]; + fp16 const_71_promoted = const()[name = string("const_71_promoted"), val = fp16(-0x1p+0)]; + tensor var_4496 = mul(x = x_121, y = const_71_promoted)[name = string("op_4496")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181 = concat(axis = var_4494, interleave = input_181_interleave_0, values = (x_121, var_4496))[name = string("input_181")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_4491_to_fp16 = const()[name = string("op_4491_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_4491_to_fp16, x = input_181)[name = string("normed_173_cast_fp16")]; + tensor var_4501_split_sizes_0 = const()[name = string("op_4501_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4501_axis_0 = const()[name = string("op_4501_axis_0"), val = int32(-1)]; + tensor var_4501_0, tensor var_4501_1 = split(axis = var_4501_axis_0, split_sizes = var_4501_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_4501")]; + tensor q_77 = mul(x = var_4501_0, y = layers_2_self_attn_q_norm_weight)[name = string("q_77")]; + tensor var_4508 = const()[name = string("op_4508"), val = tensor([1, 3, 8, 256])]; + tensor var_4509 = reshape(shape = var_4508, x = q_77)[name = string("op_4509")]; + tensor var_4514 = const()[name = string("op_4514"), val = tensor([0, 2, 1, 3])]; + tensor q_79 = transpose(perm = var_4514, x = var_4509)[name = string("transpose_117")]; + tensor var_4516_cast_fp16 = mul(x = q_79, y = cos_s)[name = string("op_4516_cast_fp16")]; + tensor var_4517_split_sizes_0 = const()[name = string("op_4517_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4517_axis_0 = const()[name = string("op_4517_axis_0"), val = int32(-1)]; + tensor var_4517_0, tensor var_4517_1 = split(axis = var_4517_axis_0, split_sizes = var_4517_split_sizes_0, x = q_79)[name = string("op_4517")]; + fp16 const_72_promoted = const()[name = string("const_72_promoted"), val = fp16(-0x1p+0)]; + tensor var_4519 = mul(x = var_4517_1, y = const_72_promoted)[name = string("op_4519")]; + int32 var_4521 = const()[name = string("op_4521"), val = int32(-1)]; + bool var_4522_interleave_0 = const()[name = string("op_4522_interleave_0"), val = bool(false)]; + tensor var_4522 = concat(axis = var_4521, interleave = var_4522_interleave_0, values = (var_4519, var_4517_0))[name = string("op_4522")]; + tensor var_4523_cast_fp16 = mul(x = var_4522, y = sin_s)[name = string("op_4523_cast_fp16")]; + tensor q_83_cast_fp16 = add(x = var_4516_cast_fp16, y = var_4523_cast_fp16)[name = string("q_83_cast_fp16")]; + string k_39_pad_type_0 = const()[name = string("k_39_pad_type_0"), val = string("valid")]; + tensor k_39_strides_0 = const()[name = string("k_39_strides_0"), val = tensor([1, 1])]; + tensor k_39_pad_0 = const()[name = string("k_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_39_dilations_0 = const()[name = string("k_39_dilations_0"), val = tensor([1, 1])]; + int32 k_39_groups_0 = const()[name = string("k_39_groups_0"), val = int32(1)]; + tensor k_39 = conv(dilations = k_39_dilations_0, groups = k_39_groups_0, pad = k_39_pad_0, pad_type = k_39_pad_type_0, strides = k_39_strides_0, weight = layers_6_self_attn_k_proj_weight_palettized, x = var_4443_cast_fp16)[name = string("k_39")]; + tensor var_4541 = const()[name = string("op_4541"), val = tensor([1, 2, 256, 3])]; + tensor var_4542 = reshape(shape = var_4541, x = k_39)[name = string("op_4542")]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_15_pad_type_0 = const()[name = string("v_15_pad_type_0"), val = string("valid")]; + tensor v_15_strides_0 = const()[name = string("v_15_strides_0"), val = tensor([1, 1])]; + tensor v_15_pad_0 = const()[name = string("v_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_15_dilations_0 = const()[name = string("v_15_dilations_0"), val = tensor([1, 1])]; + int32 v_15_groups_0 = const()[name = string("v_15_groups_0"), val = int32(1)]; + tensor v_15 = conv(dilations = v_15_dilations_0, groups = v_15_groups_0, pad = v_15_pad_0, pad_type = v_15_pad_type_0, strides = v_15_strides_0, weight = layers_6_self_attn_v_proj_weight_palettized, x = var_4443_cast_fp16)[name = string("v_15")]; + tensor var_4569 = const()[name = string("op_4569"), val = tensor([1, 2, 256, 3])]; + tensor var_4570 = reshape(shape = var_4569, x = v_15)[name = string("op_4570")]; + tensor var_4575 = const()[name = string("op_4575"), val = tensor([0, 1, 3, 2])]; + tensor var_4593 = const()[name = string("op_4593"), val = tensor([3, 2, 256])]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = var_4542)[name = string("transpose_116")]; + tensor x_123 = reshape(shape = var_4593, x = transpose_67)[name = string("x_123")]; + int32 var_4599 = const()[name = string("op_4599"), val = int32(-1)]; + fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; + tensor var_4601 = mul(x = x_123, y = const_73_promoted)[name = string("op_4601")]; + bool input_183_interleave_0 = const()[name = string("input_183_interleave_0"), val = bool(false)]; + tensor input_183 = concat(axis = var_4599, interleave = input_183_interleave_0, values = (x_123, var_4601))[name = string("input_183")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_4596_to_fp16 = const()[name = string("op_4596_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_4596_to_fp16, x = input_183)[name = string("normed_177_cast_fp16")]; + tensor var_4606_split_sizes_0 = const()[name = string("op_4606_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4606_axis_0 = const()[name = string("op_4606_axis_0"), val = int32(-1)]; + tensor var_4606_0, tensor var_4606_1 = split(axis = var_4606_axis_0, split_sizes = var_4606_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_4606")]; + tensor k_43 = mul(x = var_4606_0, y = layers_6_self_attn_k_norm_weight)[name = string("k_43")]; + tensor var_4613 = const()[name = string("op_4613"), val = tensor([1, 3, 2, 256])]; + tensor var_4614 = reshape(shape = var_4613, x = k_43)[name = string("op_4614")]; + tensor var_4619 = const()[name = string("op_4619"), val = tensor([0, 2, 1, 3])]; + fp16 var_4621_promoted = const()[name = string("op_4621_promoted"), val = fp16(0x1p+1)]; + tensor var_4576 = transpose(perm = var_4575, x = var_4570)[name = string("transpose_115")]; + tensor var_4622 = pow(x = var_4576, y = var_4621_promoted)[name = string("op_4622")]; + tensor var_4627_axes_0 = const()[name = string("op_4627_axes_0"), val = tensor([-1])]; + bool var_4627_keep_dims_0 = const()[name = string("op_4627_keep_dims_0"), val = bool(true)]; + tensor var_4627 = reduce_mean(axes = var_4627_axes_0, keep_dims = var_4627_keep_dims_0, x = var_4622)[name = string("op_4627")]; + fp16 var_4629_to_fp16 = const()[name = string("op_4629_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_13_cast_fp16 = add(x = var_4627, y = var_4629_to_fp16)[name = string("mean_sq_13_cast_fp16")]; + fp32 var_4631_epsilon_0 = const()[name = string("op_4631_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4631_cast_fp16 = rsqrt(epsilon = var_4631_epsilon_0, x = mean_sq_13_cast_fp16)[name = string("op_4631_cast_fp16")]; + tensor input_187_cast_fp16 = mul(x = var_4576, y = var_4631_cast_fp16)[name = string("input_187_cast_fp16")]; + tensor q_81 = transpose(perm = var_4619, x = var_4614)[name = string("transpose_114")]; + tensor var_4633_cast_fp16 = mul(x = q_81, y = cos_s)[name = string("op_4633_cast_fp16")]; + tensor var_4634_split_sizes_0 = const()[name = string("op_4634_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4634_axis_0 = const()[name = string("op_4634_axis_0"), val = int32(-1)]; + tensor var_4634_0, tensor var_4634_1 = split(axis = var_4634_axis_0, split_sizes = var_4634_split_sizes_0, x = q_81)[name = string("op_4634")]; + fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)]; + tensor var_4636 = mul(x = var_4634_1, y = const_74_promoted)[name = string("op_4636")]; + int32 var_4638 = const()[name = string("op_4638"), val = int32(-1)]; + bool var_4639_interleave_0 = const()[name = string("op_4639_interleave_0"), val = bool(false)]; + tensor var_4639 = concat(axis = var_4638, interleave = var_4639_interleave_0, values = (var_4636, var_4634_0))[name = string("op_4639")]; + tensor var_4640_cast_fp16 = mul(x = var_4639, y = sin_s)[name = string("op_4640_cast_fp16")]; + tensor input_185_cast_fp16 = add(x = var_4633_cast_fp16, y = var_4640_cast_fp16)[name = string("input_185_cast_fp16")]; + tensor k_padded_11_pad_0 = const()[name = string("k_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_11_mode_0 = const()[name = string("k_padded_11_mode_0"), val = string("constant")]; + fp16 const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_11_cast_fp16 = pad(constant_val = const_75_to_fp16, mode = k_padded_11_mode_0, pad = k_padded_11_pad_0, x = input_185_cast_fp16)[name = string("k_padded_11_cast_fp16")]; + tensor v_padded_11_pad_0 = const()[name = string("v_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_11_mode_0 = const()[name = string("v_padded_11_mode_0"), val = string("constant")]; + fp16 const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_11_cast_fp16 = pad(constant_val = const_76_to_fp16, mode = v_padded_11_mode_0, pad = v_padded_11_pad_0, x = input_187_cast_fp16)[name = string("v_padded_11_cast_fp16")]; + tensor slot_k_13_begin_0 = const()[name = string("slot_k_13_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor slot_k_13_end_0 = const()[name = string("slot_k_13_end_0"), val = tensor([6, 2, 512, 512])]; + tensor slot_k_13_end_mask_0 = const()[name = string("slot_k_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_13_cast_fp16 = slice_by_index(begin = slot_k_13_begin_0, end = slot_k_13_end_0, end_mask = slot_k_13_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("slot_k_13_cast_fp16")]; + tensor slot_v_13_begin_0 = const()[name = string("slot_v_13_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor slot_v_13_end_0 = const()[name = string("slot_v_13_end_0"), val = tensor([6, 2, 512, 512])]; + tensor slot_v_13_end_mask_0 = const()[name = string("slot_v_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_13_cast_fp16 = slice_by_index(begin = slot_v_13_begin_0, end = slot_v_13_end_0, end_mask = slot_v_13_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("slot_v_13_cast_fp16")]; + tensor var_4679_begin_0 = const()[name = string("op_4679_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_4679_end_0 = const()[name = string("op_4679_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4679_end_mask_0 = const()[name = string("op_4679_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4679_cast_fp16 = slice_by_index(begin = var_4679_begin_0, end = var_4679_end_0, end_mask = var_4679_end_mask_0, x = slot_k_13_cast_fp16)[name = string("op_4679_cast_fp16")]; + int32 var_4686 = const()[name = string("op_4686"), val = int32(2)]; + bool new_k_13_interleave_0 = const()[name = string("new_k_13_interleave_0"), val = bool(false)]; + tensor new_k_13_cast_fp16 = concat(axis = var_4686, interleave = new_k_13_interleave_0, values = (var_4679_cast_fp16, k_padded_11_cast_fp16))[name = string("new_k_13_cast_fp16")]; + tensor var_4702_begin_0 = const()[name = string("op_4702_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_4702_end_0 = const()[name = string("op_4702_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4702_end_mask_0 = const()[name = string("op_4702_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4702_cast_fp16 = slice_by_index(begin = var_4702_begin_0, end = var_4702_end_0, end_mask = var_4702_end_mask_0, x = slot_v_13_cast_fp16)[name = string("op_4702_cast_fp16")]; + int32 var_4709 = const()[name = string("op_4709"), val = int32(2)]; + bool new_v_13_interleave_0 = const()[name = string("new_v_13_interleave_0"), val = bool(false)]; + tensor new_v_13_cast_fp16 = concat(axis = var_4709, interleave = new_v_13_interleave_0, values = (var_4702_cast_fp16, v_padded_11_cast_fp16))[name = string("new_v_13_cast_fp16")]; + tensor var_4715_begin_0 = const()[name = string("op_4715_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4715_end_0 = const()[name = string("op_4715_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_4715_end_mask_0 = const()[name = string("op_4715_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4715_cast_fp16 = slice_by_index(begin = var_4715_begin_0, end = var_4715_end_0, end_mask = var_4715_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("op_4715_cast_fp16")]; + tensor var_4720_begin_0 = const()[name = string("op_4720_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4720_end_0 = const()[name = string("op_4720_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_4720_end_mask_0 = const()[name = string("op_4720_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4720_cast_fp16 = slice_by_index(begin = var_4720_begin_0, end = var_4720_end_0, end_mask = var_4720_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("op_4720_cast_fp16")]; + int32 var_4722 = const()[name = string("op_4722"), val = int32(0)]; + bool K_sliding_out_11_interleave_0 = const()[name = string("K_sliding_out_11_interleave_0"), val = bool(false)]; + tensor K_sliding_out_11_cast_fp16 = concat(axis = var_4722, interleave = K_sliding_out_11_interleave_0, values = (var_4715_cast_fp16, new_k_13_cast_fp16, var_4720_cast_fp16))[name = string("K_sliding_out_11_cast_fp16")]; + tensor var_4728_begin_0 = const()[name = string("op_4728_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4728_end_0 = const()[name = string("op_4728_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_4728_end_mask_0 = const()[name = string("op_4728_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4728_cast_fp16 = slice_by_index(begin = var_4728_begin_0, end = var_4728_end_0, end_mask = var_4728_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("op_4728_cast_fp16")]; + tensor var_4733_begin_0 = const()[name = string("op_4733_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4733_end_0 = const()[name = string("op_4733_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_4733_end_mask_0 = const()[name = string("op_4733_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4733_cast_fp16 = slice_by_index(begin = var_4733_begin_0, end = var_4733_end_0, end_mask = var_4733_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("op_4733_cast_fp16")]; + int32 var_4735 = const()[name = string("op_4735"), val = int32(0)]; + bool V_sliding_out_11_interleave_0 = const()[name = string("V_sliding_out_11_interleave_0"), val = bool(false)]; + tensor V_sliding_out_11_cast_fp16 = concat(axis = var_4735, interleave = V_sliding_out_11_interleave_0, values = (var_4728_cast_fp16, new_v_13_cast_fp16, var_4733_cast_fp16))[name = string("V_sliding_out_11_cast_fp16")]; + tensor var_4741_begin_0 = const()[name = string("op_4741_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4741_end_0 = const()[name = string("op_4741_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4741_end_mask_0 = const()[name = string("op_4741_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4741_cast_fp16 = slice_by_index(begin = var_4741_begin_0, end = var_4741_end_0, end_mask = var_4741_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("op_4741_cast_fp16")]; + tensor K_for_attn_13_begin_0 = const()[name = string("K_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_13_end_0 = const()[name = string("K_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_13_end_mask_0 = const()[name = string("K_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_13_cast_fp16 = slice_by_index(begin = K_for_attn_13_begin_0, end = K_for_attn_13_end_0, end_mask = K_for_attn_13_end_mask_0, x = var_4741_cast_fp16)[name = string("K_for_attn_13_cast_fp16")]; + tensor var_4751_begin_0 = const()[name = string("op_4751_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4751_end_0 = const()[name = string("op_4751_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4751_end_mask_0 = const()[name = string("op_4751_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4751_cast_fp16 = slice_by_index(begin = var_4751_begin_0, end = var_4751_end_0, end_mask = var_4751_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("op_4751_cast_fp16")]; + tensor V_for_attn_13_begin_0 = const()[name = string("V_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_13_end_0 = const()[name = string("V_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_13_end_mask_0 = const()[name = string("V_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_13_cast_fp16 = slice_by_index(begin = V_for_attn_13_begin_0, end = V_for_attn_13_end_0, end_mask = V_for_attn_13_end_mask_0, x = var_4751_cast_fp16)[name = string("V_for_attn_13_cast_fp16")]; + tensor transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_12_reps_0 = const()[name = string("tile_12_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_24_cast_fp16 = transpose(perm = transpose_24_perm_0, x = K_for_attn_13_cast_fp16)[name = string("transpose_113")]; + tensor tile_12_cast_fp16 = tile(reps = tile_12_reps_0, x = transpose_24_cast_fp16)[name = string("tile_12_cast_fp16")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_26, x = tile_12_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_27 = const()[name = string("concat_27"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = reshape_24_cast_fp16)[name = string("transpose_112")]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_27, x = transpose_25_cast_fp16)[name = string("reshape_25_cast_fp16")]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_13_reps_0 = const()[name = string("tile_13_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_26_cast_fp16 = transpose(perm = transpose_26_perm_0, x = V_for_attn_13_cast_fp16)[name = string("transpose_111")]; + tensor tile_13_cast_fp16 = tile(reps = tile_13_reps_0, x = transpose_26_cast_fp16)[name = string("tile_13_cast_fp16")]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_28, x = tile_13_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_27_cast_fp16 = transpose(perm = transpose_27_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_110")]; + tensor reshape_27_cast_fp16 = reshape(shape = concat_29, x = transpose_27_cast_fp16)[name = string("reshape_27_cast_fp16")]; + tensor V_expanded_13_perm_0 = const()[name = string("V_expanded_13_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor transpose_68_cast_fp16 = transpose(perm = transpose_68_perm_0, x = reshape_25_cast_fp16)[name = string("transpose_109")]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_83_cast_fp16, y = transpose_68_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_127_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_127_cast_fp16)[name = string("reduce_max_6")]; + tensor var_4786 = sub(x = x_127_cast_fp16, y = reduce_max_6)[name = string("op_4786")]; + tensor var_4792 = exp(x = var_4786)[name = string("op_4792")]; + tensor var_4802_axes_0 = const()[name = string("op_4802_axes_0"), val = tensor([-1])]; + bool var_4802_keep_dims_0 = const()[name = string("op_4802_keep_dims_0"), val = bool(true)]; + tensor var_4802 = reduce_sum(axes = var_4802_axes_0, keep_dims = var_4802_keep_dims_0, x = var_4792)[name = string("op_4802")]; + tensor var_4808_cast_fp16 = real_div(x = var_4792, y = var_4802)[name = string("op_4808_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor V_expanded_13_cast_fp16 = transpose(perm = V_expanded_13_perm_0, x = reshape_27_cast_fp16)[name = string("transpose_108")]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_4808_cast_fp16, y = V_expanded_13_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_4819 = const()[name = string("op_4819"), val = tensor([0, 2, 1, 3])]; + tensor var_4826 = const()[name = string("op_4826"), val = tensor([1, 3, -1])]; + tensor var_4820_cast_fp16 = transpose(perm = var_4819, x = attn_output_37_cast_fp16)[name = string("transpose_107")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_4826, x = var_4820_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_4831 = const()[name = string("op_4831"), val = tensor([0, 2, 1])]; + string var_4847_pad_type_0 = const()[name = string("op_4847_pad_type_0"), val = string("valid")]; + int32 var_4847_groups_0 = const()[name = string("op_4847_groups_0"), val = int32(1)]; + tensor var_4847_strides_0 = const()[name = string("op_4847_strides_0"), val = tensor([1])]; + tensor var_4847_pad_0 = const()[name = string("op_4847_pad_0"), val = tensor([0, 0])]; + tensor var_4847_dilations_0 = const()[name = string("op_4847_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551729344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554350848))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4832_cast_fp16 = transpose(perm = var_4831, x = attn_output_39_cast_fp16)[name = string("transpose_106")]; + tensor var_4847_cast_fp16 = conv(dilations = var_4847_dilations_0, groups = var_4847_groups_0, pad = var_4847_pad_0, pad_type = var_4847_pad_type_0, strides = var_4847_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4832_cast_fp16)[name = string("op_4847_cast_fp16")]; + tensor var_4851 = const()[name = string("op_4851"), val = tensor([0, 2, 1])]; + int32 var_4857 = const()[name = string("op_4857"), val = int32(-1)]; + fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_131_cast_fp16 = transpose(perm = var_4851, x = var_4847_cast_fp16)[name = string("transpose_105")]; + tensor var_4859_cast_fp16 = mul(x = x_131_cast_fp16, y = const_77_promoted_to_fp16)[name = string("op_4859_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_4857, interleave = input_191_interleave_0, values = (x_131_cast_fp16, var_4859_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_4854_to_fp16 = const()[name = string("op_4854_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_4854_to_fp16, x = input_191_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor var_4864_split_sizes_0 = const()[name = string("op_4864_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4864_axis_0 = const()[name = string("op_4864_axis_0"), val = int32(-1)]; + tensor var_4864_cast_fp16_0, tensor var_4864_cast_fp16_1 = split(axis = var_4864_axis_0, split_sizes = var_4864_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_4864_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554353472)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_4864_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_119_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_133_cast_fp16")]; + int32 var_4873 = const()[name = string("op_4873"), val = int32(-1)]; + fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4875_cast_fp16 = mul(x = x_133_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_4875_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_4873, interleave = input_193_interleave_0, values = (x_133_cast_fp16, var_4875_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_4870_to_fp16 = const()[name = string("op_4870_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_4870_to_fp16, x = input_193_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor var_4880_split_sizes_0 = const()[name = string("op_4880_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4880_axis_0 = const()[name = string("op_4880_axis_0"), val = int32(-1)]; + tensor var_4880_cast_fp16_0, tensor var_4880_cast_fp16_1 = split(axis = var_4880_axis_0, split_sizes = var_4880_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_4880_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554358656)))]; + tensor h_39_cast_fp16 = mul(x = var_4880_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_4891 = const()[name = string("op_4891"), val = tensor([0, 2, 1])]; + tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; + tensor var_4892 = transpose(perm = var_4891, x = h_39_cast_fp16)[name = string("transpose_104")]; + tensor input_195 = expand_dims(axes = input_195_axes_0, x = var_4892)[name = string("input_195")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_195)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_195)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_197 = mul(x = gate_27, y = up_13)[name = string("input_197")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_197)[name = string("mlp_out_13")]; + tensor var_4932_axes_0 = const()[name = string("op_4932_axes_0"), val = tensor([2])]; + tensor var_4932 = squeeze(axes = var_4932_axes_0, x = mlp_out_13)[name = string("op_4932")]; + tensor var_4936 = const()[name = string("op_4936"), val = tensor([0, 2, 1])]; + int32 var_4942 = const()[name = string("op_4942"), val = int32(-1)]; + fp16 const_79_promoted = const()[name = string("const_79_promoted"), val = fp16(-0x1p+0)]; + tensor x_135 = transpose(perm = var_4936, x = var_4932)[name = string("transpose_103")]; + tensor var_4944 = mul(x = x_135, y = const_79_promoted)[name = string("op_4944")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199 = concat(axis = var_4942, interleave = input_199_interleave_0, values = (x_135, var_4944))[name = string("input_199")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_4939_to_fp16 = const()[name = string("op_4939_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_4939_to_fp16, x = input_199)[name = string("normed_189_cast_fp16")]; + tensor var_4949_split_sizes_0 = const()[name = string("op_4949_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4949_axis_0 = const()[name = string("op_4949_axis_0"), val = int32(-1)]; + tensor var_4949_0, tensor var_4949_1 = split(axis = var_4949_axis_0, split_sizes = var_4949_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_4949")]; + tensor hidden_states_63 = mul(x = var_4949_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 4608])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 3, 4864])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_4977 = const()[name = string("op_4977"), val = tensor([0, 2, 1])]; + tensor input_201_axes_0 = const()[name = string("input_201_axes_0"), val = tensor([2])]; + tensor var_4978 = transpose(perm = var_4977, x = hidden_states_65_cast_fp16)[name = string("transpose_102")]; + tensor input_201 = expand_dims(axes = input_201_axes_0, x = var_4978)[name = string("input_201")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_201)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_4997 = const()[name = string("op_4997"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_4998_cast_fp16 = transpose(perm = var_4997, x = per_layer_slice_13_cast_fp16)[name = string("transpose_101")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_4998_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_203_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_203_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554363840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554691584))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_203_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_5014_axes_0 = const()[name = string("op_5014_axes_0"), val = tensor([2])]; + tensor var_5014_cast_fp16 = squeeze(axes = var_5014_axes_0, x = gated_41_cast_fp16)[name = string("op_5014_cast_fp16")]; + tensor var_5018 = const()[name = string("op_5018"), val = tensor([0, 2, 1])]; + int32 var_5024 = const()[name = string("op_5024"), val = int32(-1)]; + fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_137_cast_fp16 = transpose(perm = var_5018, x = var_5014_cast_fp16)[name = string("transpose_100")]; + tensor var_5026_cast_fp16 = mul(x = x_137_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_5026_cast_fp16")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205_cast_fp16 = concat(axis = var_5024, interleave = input_205_interleave_0, values = (x_137_cast_fp16, var_5026_cast_fp16))[name = string("input_205_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_5021_to_fp16 = const()[name = string("op_5021_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_5021_to_fp16, x = input_205_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor var_5031_split_sizes_0 = const()[name = string("op_5031_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5031_axis_0 = const()[name = string("op_5031_axis_0"), val = int32(-1)]; + tensor var_5031_cast_fp16_0, tensor var_5031_cast_fp16_1 = split(axis = var_5031_axis_0, split_sizes = var_5031_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_5031_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554694208)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_5031_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_81_promoted_to_fp16 = const()[name = string("const_81_promoted_to_fp16"), val = tensor([0x1.16p-1])]; + tensor x_139_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_81_promoted_to_fp16)[name = string("x_139_cast_fp16")]; + int32 var_5046 = const()[name = string("op_5046"), val = int32(-1)]; + fp16 const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5048_cast_fp16 = mul(x = x_139_cast_fp16, y = const_82_promoted_to_fp16)[name = string("op_5048_cast_fp16")]; + bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; + tensor input_207_cast_fp16 = concat(axis = var_5046, interleave = input_207_interleave_0, values = (x_139_cast_fp16, var_5048_cast_fp16))[name = string("input_207_cast_fp16")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_5043_to_fp16 = const()[name = string("op_5043_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_5043_to_fp16, x = input_207_cast_fp16)[name = string("normed_197_cast_fp16")]; + tensor var_5053_split_sizes_0 = const()[name = string("op_5053_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5053_axis_0 = const()[name = string("op_5053_axis_0"), val = int32(-1)]; + tensor var_5053_cast_fp16_0, tensor var_5053_cast_fp16_1 = split(axis = var_5053_axis_0, split_sizes = var_5053_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_5053_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554699392)))]; + tensor h_43_cast_fp16 = mul(x = var_5053_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_5059 = const()[name = string("op_5059"), val = tensor([0, 2, 1])]; + tensor var_5062_axes_0 = const()[name = string("op_5062_axes_0"), val = tensor([2])]; + tensor var_5060_cast_fp16 = transpose(perm = var_5059, x = h_43_cast_fp16)[name = string("transpose_99")]; + tensor var_5062_cast_fp16 = expand_dims(axes = var_5062_axes_0, x = var_5060_cast_fp16)[name = string("op_5062_cast_fp16")]; + string q_85_pad_type_0 = const()[name = string("q_85_pad_type_0"), val = string("valid")]; + tensor q_85_strides_0 = const()[name = string("q_85_strides_0"), val = tensor([1, 1])]; + tensor q_85_pad_0 = const()[name = string("q_85_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_85_dilations_0 = const()[name = string("q_85_dilations_0"), val = tensor([1, 1])]; + int32 q_85_groups_0 = const()[name = string("q_85_groups_0"), val = int32(1)]; + tensor q_85 = conv(dilations = q_85_dilations_0, groups = q_85_groups_0, pad = q_85_pad_0, pad_type = q_85_pad_type_0, strides = q_85_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_5062_cast_fp16)[name = string("q_85")]; + tensor var_5083 = const()[name = string("op_5083"), val = tensor([1, 8, 256, 3])]; + tensor var_5084 = reshape(shape = var_5083, x = q_85)[name = string("op_5084")]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_5107 = const()[name = string("op_5107"), val = tensor([3, 8, 256])]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = var_5084)[name = string("transpose_98")]; + tensor x_141 = reshape(shape = var_5107, x = transpose_69)[name = string("x_141")]; + int32 var_5113 = const()[name = string("op_5113"), val = int32(-1)]; + fp16 const_83_promoted = const()[name = string("const_83_promoted"), val = fp16(-0x1p+0)]; + tensor var_5115 = mul(x = x_141, y = const_83_promoted)[name = string("op_5115")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211 = concat(axis = var_5113, interleave = input_211_interleave_0, values = (x_141, var_5115))[name = string("input_211")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_5110_to_fp16 = const()[name = string("op_5110_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_5110_to_fp16, x = input_211)[name = string("normed_201_cast_fp16")]; + tensor var_5120_split_sizes_0 = const()[name = string("op_5120_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5120_axis_0 = const()[name = string("op_5120_axis_0"), val = int32(-1)]; + tensor var_5120_0, tensor var_5120_1 = split(axis = var_5120_axis_0, split_sizes = var_5120_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_5120")]; + tensor q_89 = mul(x = var_5120_0, y = layers_7_self_attn_q_norm_weight)[name = string("q_89")]; + tensor var_5127 = const()[name = string("op_5127"), val = tensor([1, 3, 8, 256])]; + tensor var_5128 = reshape(shape = var_5127, x = q_89)[name = string("op_5128")]; + tensor var_5133 = const()[name = string("op_5133"), val = tensor([0, 2, 1, 3])]; + tensor q_91 = transpose(perm = var_5133, x = var_5128)[name = string("transpose_97")]; + tensor var_5135_cast_fp16 = mul(x = q_91, y = cos_s)[name = string("op_5135_cast_fp16")]; + tensor var_5136_split_sizes_0 = const()[name = string("op_5136_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5136_axis_0 = const()[name = string("op_5136_axis_0"), val = int32(-1)]; + tensor var_5136_0, tensor var_5136_1 = split(axis = var_5136_axis_0, split_sizes = var_5136_split_sizes_0, x = q_91)[name = string("op_5136")]; + fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; + tensor var_5138 = mul(x = var_5136_1, y = const_84_promoted)[name = string("op_5138")]; + int32 var_5140 = const()[name = string("op_5140"), val = int32(-1)]; + bool var_5141_interleave_0 = const()[name = string("op_5141_interleave_0"), val = bool(false)]; + tensor var_5141 = concat(axis = var_5140, interleave = var_5141_interleave_0, values = (var_5138, var_5136_0))[name = string("op_5141")]; + tensor var_5142_cast_fp16 = mul(x = var_5141, y = sin_s)[name = string("op_5142_cast_fp16")]; + tensor q_95_cast_fp16 = add(x = var_5135_cast_fp16, y = var_5142_cast_fp16)[name = string("q_95_cast_fp16")]; + string k_45_pad_type_0 = const()[name = string("k_45_pad_type_0"), val = string("valid")]; + tensor k_45_strides_0 = const()[name = string("k_45_strides_0"), val = tensor([1, 1])]; + tensor k_45_pad_0 = const()[name = string("k_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_45_dilations_0 = const()[name = string("k_45_dilations_0"), val = tensor([1, 1])]; + int32 k_45_groups_0 = const()[name = string("k_45_groups_0"), val = int32(1)]; + tensor k_45 = conv(dilations = k_45_dilations_0, groups = k_45_groups_0, pad = k_45_pad_0, pad_type = k_45_pad_type_0, strides = k_45_strides_0, weight = layers_7_self_attn_k_proj_weight_palettized, x = var_5062_cast_fp16)[name = string("k_45")]; + tensor var_5160 = const()[name = string("op_5160"), val = tensor([1, 2, 256, 3])]; + tensor var_5161 = reshape(shape = var_5160, x = k_45)[name = string("op_5161")]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_17_pad_type_0 = const()[name = string("v_17_pad_type_0"), val = string("valid")]; + tensor v_17_strides_0 = const()[name = string("v_17_strides_0"), val = tensor([1, 1])]; + tensor v_17_pad_0 = const()[name = string("v_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_17_dilations_0 = const()[name = string("v_17_dilations_0"), val = tensor([1, 1])]; + int32 v_17_groups_0 = const()[name = string("v_17_groups_0"), val = int32(1)]; + tensor v_17 = conv(dilations = v_17_dilations_0, groups = v_17_groups_0, pad = v_17_pad_0, pad_type = v_17_pad_type_0, strides = v_17_strides_0, weight = layers_7_self_attn_v_proj_weight_palettized, x = var_5062_cast_fp16)[name = string("v_17")]; + tensor var_5188 = const()[name = string("op_5188"), val = tensor([1, 2, 256, 3])]; + tensor var_5189 = reshape(shape = var_5188, x = v_17)[name = string("op_5189")]; + tensor var_5194 = const()[name = string("op_5194"), val = tensor([0, 1, 3, 2])]; + tensor var_5212 = const()[name = string("op_5212"), val = tensor([3, 2, 256])]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = var_5161)[name = string("transpose_96")]; + tensor x_143 = reshape(shape = var_5212, x = transpose_70)[name = string("x_143")]; + int32 var_5218 = const()[name = string("op_5218"), val = int32(-1)]; + fp16 const_85_promoted = const()[name = string("const_85_promoted"), val = fp16(-0x1p+0)]; + tensor var_5220 = mul(x = x_143, y = const_85_promoted)[name = string("op_5220")]; + bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; + tensor input_213 = concat(axis = var_5218, interleave = input_213_interleave_0, values = (x_143, var_5220))[name = string("input_213")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_5215_to_fp16 = const()[name = string("op_5215_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_5215_to_fp16, x = input_213)[name = string("normed_205_cast_fp16")]; + tensor var_5225_split_sizes_0 = const()[name = string("op_5225_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5225_axis_0 = const()[name = string("op_5225_axis_0"), val = int32(-1)]; + tensor var_5225_0, tensor var_5225_1 = split(axis = var_5225_axis_0, split_sizes = var_5225_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_5225")]; + tensor k_49 = mul(x = var_5225_0, y = layers_7_self_attn_k_norm_weight)[name = string("k_49")]; + tensor var_5232 = const()[name = string("op_5232"), val = tensor([1, 3, 2, 256])]; + tensor var_5233 = reshape(shape = var_5232, x = k_49)[name = string("op_5233")]; + tensor var_5238 = const()[name = string("op_5238"), val = tensor([0, 2, 1, 3])]; + fp16 var_5240_promoted = const()[name = string("op_5240_promoted"), val = fp16(0x1p+1)]; + tensor var_5195 = transpose(perm = var_5194, x = var_5189)[name = string("transpose_95")]; + tensor var_5241 = pow(x = var_5195, y = var_5240_promoted)[name = string("op_5241")]; + tensor var_5246_axes_0 = const()[name = string("op_5246_axes_0"), val = tensor([-1])]; + bool var_5246_keep_dims_0 = const()[name = string("op_5246_keep_dims_0"), val = bool(true)]; + tensor var_5246 = reduce_mean(axes = var_5246_axes_0, keep_dims = var_5246_keep_dims_0, x = var_5241)[name = string("op_5246")]; + fp16 var_5248_to_fp16 = const()[name = string("op_5248_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_15_cast_fp16 = add(x = var_5246, y = var_5248_to_fp16)[name = string("mean_sq_15_cast_fp16")]; + fp32 var_5250_epsilon_0 = const()[name = string("op_5250_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5250_cast_fp16 = rsqrt(epsilon = var_5250_epsilon_0, x = mean_sq_15_cast_fp16)[name = string("op_5250_cast_fp16")]; + tensor input_217_cast_fp16 = mul(x = var_5195, y = var_5250_cast_fp16)[name = string("input_217_cast_fp16")]; + tensor q_93 = transpose(perm = var_5238, x = var_5233)[name = string("transpose_94")]; + tensor var_5252_cast_fp16 = mul(x = q_93, y = cos_s)[name = string("op_5252_cast_fp16")]; + tensor var_5253_split_sizes_0 = const()[name = string("op_5253_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5253_axis_0 = const()[name = string("op_5253_axis_0"), val = int32(-1)]; + tensor var_5253_0, tensor var_5253_1 = split(axis = var_5253_axis_0, split_sizes = var_5253_split_sizes_0, x = q_93)[name = string("op_5253")]; + fp16 const_86_promoted = const()[name = string("const_86_promoted"), val = fp16(-0x1p+0)]; + tensor var_5255 = mul(x = var_5253_1, y = const_86_promoted)[name = string("op_5255")]; + int32 var_5257 = const()[name = string("op_5257"), val = int32(-1)]; + bool var_5258_interleave_0 = const()[name = string("op_5258_interleave_0"), val = bool(false)]; + tensor var_5258 = concat(axis = var_5257, interleave = var_5258_interleave_0, values = (var_5255, var_5253_0))[name = string("op_5258")]; + tensor var_5259_cast_fp16 = mul(x = var_5258, y = sin_s)[name = string("op_5259_cast_fp16")]; + tensor input_215_cast_fp16 = add(x = var_5252_cast_fp16, y = var_5259_cast_fp16)[name = string("input_215_cast_fp16")]; + tensor k_padded_13_pad_0 = const()[name = string("k_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_13_mode_0 = const()[name = string("k_padded_13_mode_0"), val = string("constant")]; + fp16 const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_13_cast_fp16 = pad(constant_val = const_87_to_fp16, mode = k_padded_13_mode_0, pad = k_padded_13_pad_0, x = input_215_cast_fp16)[name = string("k_padded_13_cast_fp16")]; + tensor v_padded_13_pad_0 = const()[name = string("v_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_13_mode_0 = const()[name = string("v_padded_13_mode_0"), val = string("constant")]; + fp16 const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_13_cast_fp16 = pad(constant_val = const_88_to_fp16, mode = v_padded_13_mode_0, pad = v_padded_13_pad_0, x = input_217_cast_fp16)[name = string("v_padded_13_cast_fp16")]; + tensor slot_k_15_begin_0 = const()[name = string("slot_k_15_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor slot_k_15_end_0 = const()[name = string("slot_k_15_end_0"), val = tensor([7, 2, 512, 512])]; + tensor slot_k_15_end_mask_0 = const()[name = string("slot_k_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_15_cast_fp16 = slice_by_index(begin = slot_k_15_begin_0, end = slot_k_15_end_0, end_mask = slot_k_15_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("slot_k_15_cast_fp16")]; + tensor slot_v_15_begin_0 = const()[name = string("slot_v_15_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor slot_v_15_end_0 = const()[name = string("slot_v_15_end_0"), val = tensor([7, 2, 512, 512])]; + tensor slot_v_15_end_mask_0 = const()[name = string("slot_v_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_15_cast_fp16 = slice_by_index(begin = slot_v_15_begin_0, end = slot_v_15_end_0, end_mask = slot_v_15_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("slot_v_15_cast_fp16")]; + tensor var_5298_begin_0 = const()[name = string("op_5298_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_5298_end_0 = const()[name = string("op_5298_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5298_end_mask_0 = const()[name = string("op_5298_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5298_cast_fp16 = slice_by_index(begin = var_5298_begin_0, end = var_5298_end_0, end_mask = var_5298_end_mask_0, x = slot_k_15_cast_fp16)[name = string("op_5298_cast_fp16")]; + int32 var_5305 = const()[name = string("op_5305"), val = int32(2)]; + bool new_k_15_interleave_0 = const()[name = string("new_k_15_interleave_0"), val = bool(false)]; + tensor new_k_15_cast_fp16 = concat(axis = var_5305, interleave = new_k_15_interleave_0, values = (var_5298_cast_fp16, k_padded_13_cast_fp16))[name = string("new_k_15_cast_fp16")]; + tensor var_5321_begin_0 = const()[name = string("op_5321_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_5321_end_0 = const()[name = string("op_5321_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5321_end_mask_0 = const()[name = string("op_5321_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5321_cast_fp16 = slice_by_index(begin = var_5321_begin_0, end = var_5321_end_0, end_mask = var_5321_end_mask_0, x = slot_v_15_cast_fp16)[name = string("op_5321_cast_fp16")]; + int32 var_5328 = const()[name = string("op_5328"), val = int32(2)]; + bool new_v_15_interleave_0 = const()[name = string("new_v_15_interleave_0"), val = bool(false)]; + tensor new_v_15_cast_fp16 = concat(axis = var_5328, interleave = new_v_15_interleave_0, values = (var_5321_cast_fp16, v_padded_13_cast_fp16))[name = string("new_v_15_cast_fp16")]; + tensor var_5334_begin_0 = const()[name = string("op_5334_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5334_end_0 = const()[name = string("op_5334_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_5334_end_mask_0 = const()[name = string("op_5334_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5334_cast_fp16 = slice_by_index(begin = var_5334_begin_0, end = var_5334_end_0, end_mask = var_5334_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("op_5334_cast_fp16")]; + tensor var_5339_begin_0 = const()[name = string("op_5339_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5339_end_0 = const()[name = string("op_5339_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_5339_end_mask_0 = const()[name = string("op_5339_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5339_cast_fp16 = slice_by_index(begin = var_5339_begin_0, end = var_5339_end_0, end_mask = var_5339_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("op_5339_cast_fp16")]; + int32 var_5341 = const()[name = string("op_5341"), val = int32(0)]; + bool K_sliding_out_13_interleave_0 = const()[name = string("K_sliding_out_13_interleave_0"), val = bool(false)]; + tensor K_sliding_out_13_cast_fp16 = concat(axis = var_5341, interleave = K_sliding_out_13_interleave_0, values = (var_5334_cast_fp16, new_k_15_cast_fp16, var_5339_cast_fp16))[name = string("K_sliding_out_13_cast_fp16")]; + tensor var_5347_begin_0 = const()[name = string("op_5347_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5347_end_0 = const()[name = string("op_5347_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_5347_end_mask_0 = const()[name = string("op_5347_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5347_cast_fp16 = slice_by_index(begin = var_5347_begin_0, end = var_5347_end_0, end_mask = var_5347_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("op_5347_cast_fp16")]; + tensor var_5352_begin_0 = const()[name = string("op_5352_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5352_end_0 = const()[name = string("op_5352_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_5352_end_mask_0 = const()[name = string("op_5352_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5352_cast_fp16 = slice_by_index(begin = var_5352_begin_0, end = var_5352_end_0, end_mask = var_5352_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("op_5352_cast_fp16")]; + int32 var_5354 = const()[name = string("op_5354"), val = int32(0)]; + bool V_sliding_out_13_interleave_0 = const()[name = string("V_sliding_out_13_interleave_0"), val = bool(false)]; + tensor V_sliding_out_13_cast_fp16 = concat(axis = var_5354, interleave = V_sliding_out_13_interleave_0, values = (var_5347_cast_fp16, new_v_15_cast_fp16, var_5352_cast_fp16))[name = string("V_sliding_out_13_cast_fp16")]; + tensor var_5360_begin_0 = const()[name = string("op_5360_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_5360_end_0 = const()[name = string("op_5360_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_5360_end_mask_0 = const()[name = string("op_5360_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5360_cast_fp16 = slice_by_index(begin = var_5360_begin_0, end = var_5360_end_0, end_mask = var_5360_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("op_5360_cast_fp16")]; + tensor K_for_attn_15_begin_0 = const()[name = string("K_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_15_end_0 = const()[name = string("K_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_15_end_mask_0 = const()[name = string("K_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_15_cast_fp16 = slice_by_index(begin = K_for_attn_15_begin_0, end = K_for_attn_15_end_0, end_mask = K_for_attn_15_end_mask_0, x = var_5360_cast_fp16)[name = string("K_for_attn_15_cast_fp16")]; + tensor var_5370_begin_0 = const()[name = string("op_5370_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_5370_end_0 = const()[name = string("op_5370_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_5370_end_mask_0 = const()[name = string("op_5370_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5370_cast_fp16 = slice_by_index(begin = var_5370_begin_0, end = var_5370_end_0, end_mask = var_5370_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("op_5370_cast_fp16")]; + tensor V_for_attn_15_begin_0 = const()[name = string("V_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_15_end_0 = const()[name = string("V_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_15_end_mask_0 = const()[name = string("V_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_15_cast_fp16 = slice_by_index(begin = V_for_attn_15_begin_0, end = V_for_attn_15_end_0, end_mask = V_for_attn_15_end_mask_0, x = var_5370_cast_fp16)[name = string("V_for_attn_15_cast_fp16")]; + tensor transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_14_reps_0 = const()[name = string("tile_14_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_28_cast_fp16 = transpose(perm = transpose_28_perm_0, x = K_for_attn_15_cast_fp16)[name = string("transpose_93")]; + tensor tile_14_cast_fp16 = tile(reps = tile_14_reps_0, x = transpose_28_cast_fp16)[name = string("tile_14_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_28_cast_fp16 = reshape(shape = concat_30, x = tile_14_cast_fp16)[name = string("reshape_28_cast_fp16")]; + tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = reshape_28_cast_fp16)[name = string("transpose_92")]; + tensor reshape_29_cast_fp16 = reshape(shape = concat_31, x = transpose_29_cast_fp16)[name = string("reshape_29_cast_fp16")]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_30_perm_0 = const()[name = string("transpose_30_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_15_reps_0 = const()[name = string("tile_15_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_30_cast_fp16 = transpose(perm = transpose_30_perm_0, x = V_for_attn_15_cast_fp16)[name = string("transpose_91")]; + tensor tile_15_cast_fp16 = tile(reps = tile_15_reps_0, x = transpose_30_cast_fp16)[name = string("tile_15_cast_fp16")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_30_cast_fp16 = reshape(shape = concat_32, x = tile_15_cast_fp16)[name = string("reshape_30_cast_fp16")]; + tensor transpose_31_perm_0 = const()[name = string("transpose_31_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_33 = const()[name = string("concat_33"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_31_cast_fp16 = transpose(perm = transpose_31_perm_0, x = reshape_30_cast_fp16)[name = string("transpose_90")]; + tensor reshape_31_cast_fp16 = reshape(shape = concat_33, x = transpose_31_cast_fp16)[name = string("reshape_31_cast_fp16")]; + tensor V_expanded_15_perm_0 = const()[name = string("V_expanded_15_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor transpose_71_cast_fp16 = transpose(perm = transpose_71_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_89")]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_95_cast_fp16, y = transpose_71_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_147_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_147_cast_fp16)[name = string("reduce_max_7")]; + tensor var_5405 = sub(x = x_147_cast_fp16, y = reduce_max_7)[name = string("op_5405")]; + tensor var_5411 = exp(x = var_5405)[name = string("op_5411")]; + tensor var_5421_axes_0 = const()[name = string("op_5421_axes_0"), val = tensor([-1])]; + bool var_5421_keep_dims_0 = const()[name = string("op_5421_keep_dims_0"), val = bool(true)]; + tensor var_5421 = reduce_sum(axes = var_5421_axes_0, keep_dims = var_5421_keep_dims_0, x = var_5411)[name = string("op_5421")]; + tensor var_5427_cast_fp16 = real_div(x = var_5411, y = var_5421)[name = string("op_5427_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor V_expanded_15_cast_fp16 = transpose(perm = V_expanded_15_perm_0, x = reshape_31_cast_fp16)[name = string("transpose_88")]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_5427_cast_fp16, y = V_expanded_15_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_5438 = const()[name = string("op_5438"), val = tensor([0, 2, 1, 3])]; + tensor var_5445 = const()[name = string("op_5445"), val = tensor([1, 3, -1])]; + tensor var_5439_cast_fp16 = transpose(perm = var_5438, x = attn_output_43_cast_fp16)[name = string("transpose_87")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_5445, x = var_5439_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_5450 = const()[name = string("op_5450"), val = tensor([0, 2, 1])]; + string var_5466_pad_type_0 = const()[name = string("op_5466_pad_type_0"), val = string("valid")]; + int32 var_5466_groups_0 = const()[name = string("op_5466_groups_0"), val = int32(1)]; + tensor var_5466_strides_0 = const()[name = string("op_5466_strides_0"), val = tensor([1])]; + tensor var_5466_pad_0 = const()[name = string("op_5466_pad_0"), val = tensor([0, 0])]; + tensor var_5466_dilations_0 = const()[name = string("op_5466_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554704576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557326080))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5451_cast_fp16 = transpose(perm = var_5450, x = attn_output_45_cast_fp16)[name = string("transpose_86")]; + tensor var_5466_cast_fp16 = conv(dilations = var_5466_dilations_0, groups = var_5466_groups_0, pad = var_5466_pad_0, pad_type = var_5466_pad_type_0, strides = var_5466_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5451_cast_fp16)[name = string("op_5466_cast_fp16")]; + tensor var_5470 = const()[name = string("op_5470"), val = tensor([0, 2, 1])]; + int32 var_5476 = const()[name = string("op_5476"), val = int32(-1)]; + fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_151_cast_fp16 = transpose(perm = var_5470, x = var_5466_cast_fp16)[name = string("transpose_85")]; + tensor var_5478_cast_fp16 = mul(x = x_151_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_5478_cast_fp16")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221_cast_fp16 = concat(axis = var_5476, interleave = input_221_interleave_0, values = (x_151_cast_fp16, var_5478_cast_fp16))[name = string("input_221_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_5473_to_fp16 = const()[name = string("op_5473_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_5473_to_fp16, x = input_221_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor var_5483_split_sizes_0 = const()[name = string("op_5483_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5483_axis_0 = const()[name = string("op_5483_axis_0"), val = int32(-1)]; + tensor var_5483_cast_fp16_0, tensor var_5483_cast_fp16_1 = split(axis = var_5483_axis_0, split_sizes = var_5483_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_5483_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557328704)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_5483_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_139_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_153_cast_fp16")]; + int32 var_5492 = const()[name = string("op_5492"), val = int32(-1)]; + fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5494_cast_fp16 = mul(x = x_153_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_5494_cast_fp16")]; + bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)]; + tensor input_223_cast_fp16 = concat(axis = var_5492, interleave = input_223_interleave_0, values = (x_153_cast_fp16, var_5494_cast_fp16))[name = string("input_223_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_5489_to_fp16 = const()[name = string("op_5489_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_5489_to_fp16, x = input_223_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_5499_split_sizes_0 = const()[name = string("op_5499_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5499_axis_0 = const()[name = string("op_5499_axis_0"), val = int32(-1)]; + tensor var_5499_cast_fp16_0, tensor var_5499_cast_fp16_1 = split(axis = var_5499_axis_0, split_sizes = var_5499_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_5499_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557333888)))]; + tensor h_45_cast_fp16 = mul(x = var_5499_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_5510 = const()[name = string("op_5510"), val = tensor([0, 2, 1])]; + tensor input_225_axes_0 = const()[name = string("input_225_axes_0"), val = tensor([2])]; + tensor var_5511 = transpose(perm = var_5510, x = h_45_cast_fp16)[name = string("transpose_84")]; + tensor input_225 = expand_dims(axes = input_225_axes_0, x = var_5511)[name = string("input_225")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_225)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_225)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_227 = mul(x = gate_31, y = up_15)[name = string("input_227")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_227)[name = string("mlp_out_15")]; + tensor var_5551_axes_0 = const()[name = string("op_5551_axes_0"), val = tensor([2])]; + tensor var_5551 = squeeze(axes = var_5551_axes_0, x = mlp_out_15)[name = string("op_5551")]; + tensor var_5555 = const()[name = string("op_5555"), val = tensor([0, 2, 1])]; + int32 var_5561 = const()[name = string("op_5561"), val = int32(-1)]; + fp16 const_91_promoted = const()[name = string("const_91_promoted"), val = fp16(-0x1p+0)]; + tensor x_155 = transpose(perm = var_5555, x = var_5551)[name = string("transpose_83")]; + tensor var_5563 = mul(x = x_155, y = const_91_promoted)[name = string("op_5563")]; + bool input_229_interleave_0 = const()[name = string("input_229_interleave_0"), val = bool(false)]; + tensor input_229 = concat(axis = var_5561, interleave = input_229_interleave_0, values = (x_155, var_5563))[name = string("input_229")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_5558_to_fp16 = const()[name = string("op_5558_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_5558_to_fp16, x = input_229)[name = string("normed_217_cast_fp16")]; + tensor var_5568_split_sizes_0 = const()[name = string("op_5568_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5568_axis_0 = const()[name = string("op_5568_axis_0"), val = int32(-1)]; + tensor var_5568_0, tensor var_5568_1 = split(axis = var_5568_axis_0, split_sizes = var_5568_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_5568")]; + tensor hidden_states_73 = mul(x = var_5568_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_153_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 4864])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 3, 5120])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_5596 = const()[name = string("op_5596"), val = tensor([0, 2, 1])]; + tensor input_231_axes_0 = const()[name = string("input_231_axes_0"), val = tensor([2])]; + tensor var_5597 = transpose(perm = var_5596, x = hidden_states_75_cast_fp16)[name = string("transpose_82")]; + tensor input_231 = expand_dims(axes = input_231_axes_0, x = var_5597)[name = string("input_231")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_231)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_5616 = const()[name = string("op_5616"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_5617_cast_fp16 = transpose(perm = var_5616, x = per_layer_slice_15_cast_fp16)[name = string("transpose_81")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_5617_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_233_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_233_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557339072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557666816))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_233_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_5633_axes_0 = const()[name = string("op_5633_axes_0"), val = tensor([2])]; + tensor var_5633_cast_fp16 = squeeze(axes = var_5633_axes_0, x = gated_47_cast_fp16)[name = string("op_5633_cast_fp16")]; + tensor var_5637 = const()[name = string("op_5637"), val = tensor([0, 2, 1])]; + int32 var_5643 = const()[name = string("op_5643"), val = int32(-1)]; + fp16 const_92_promoted_to_fp16 = const()[name = string("const_92_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_157_cast_fp16 = transpose(perm = var_5637, x = var_5633_cast_fp16)[name = string("transpose_80")]; + tensor var_5645_cast_fp16 = mul(x = x_157_cast_fp16, y = const_92_promoted_to_fp16)[name = string("op_5645_cast_fp16")]; + bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)]; + tensor input_235_cast_fp16 = concat(axis = var_5643, interleave = input_235_interleave_0, values = (x_157_cast_fp16, var_5645_cast_fp16))[name = string("input_235_cast_fp16")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_5640_to_fp16 = const()[name = string("op_5640_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_5640_to_fp16, x = input_235_cast_fp16)[name = string("normed_221_cast_fp16")]; + tensor var_5650_split_sizes_0 = const()[name = string("op_5650_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5650_axis_0 = const()[name = string("op_5650_axis_0"), val = int32(-1)]; + tensor var_5650_cast_fp16_0, tensor var_5650_cast_fp16_1 = split(axis = var_5650_axis_0, split_sizes = var_5650_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_5650_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557669440)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_5650_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = tensor([0x1.06p-1])]; + tensor x_159_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_93_promoted_to_fp16)[name = string("x_159_cast_fp16")]; + int32 var_5665 = const()[name = string("op_5665"), val = int32(-1)]; + fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5667_cast_fp16 = mul(x = x_159_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_5667_cast_fp16")]; + bool input_237_interleave_0 = const()[name = string("input_237_interleave_0"), val = bool(false)]; + tensor input_237_cast_fp16 = concat(axis = var_5665, interleave = input_237_interleave_0, values = (x_159_cast_fp16, var_5667_cast_fp16))[name = string("input_237_cast_fp16")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_5662_to_fp16 = const()[name = string("op_5662_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_5662_to_fp16, x = input_237_cast_fp16)[name = string("normed_225_cast_fp16")]; + tensor var_5672_split_sizes_0 = const()[name = string("op_5672_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5672_axis_0 = const()[name = string("op_5672_axis_0"), val = int32(-1)]; + tensor var_5672_cast_fp16_0, tensor var_5672_cast_fp16_1 = split(axis = var_5672_axis_0, split_sizes = var_5672_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_5672_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557674624)))]; + tensor h_49_cast_fp16 = mul(x = var_5672_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_5678 = const()[name = string("op_5678"), val = tensor([0, 2, 1])]; + tensor var_5681_axes_0 = const()[name = string("op_5681_axes_0"), val = tensor([2])]; + tensor var_5679_cast_fp16 = transpose(perm = var_5678, x = h_49_cast_fp16)[name = string("transpose_79")]; + tensor var_5681_cast_fp16 = expand_dims(axes = var_5681_axes_0, x = var_5679_cast_fp16)[name = string("op_5681_cast_fp16")]; + string q_97_pad_type_0 = const()[name = string("q_97_pad_type_0"), val = string("valid")]; + tensor q_97_strides_0 = const()[name = string("q_97_strides_0"), val = tensor([1, 1])]; + tensor q_97_pad_0 = const()[name = string("q_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_97_dilations_0 = const()[name = string("q_97_dilations_0"), val = tensor([1, 1])]; + int32 q_97_groups_0 = const()[name = string("q_97_groups_0"), val = int32(1)]; + tensor q_97 = conv(dilations = q_97_dilations_0, groups = q_97_groups_0, pad = q_97_pad_0, pad_type = q_97_pad_type_0, strides = q_97_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_5681_cast_fp16)[name = string("q_97")]; + tensor var_5702 = const()[name = string("op_5702"), val = tensor([1, 8, 256, 3])]; + tensor var_5703 = reshape(shape = var_5702, x = q_97)[name = string("op_5703")]; + tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_5726 = const()[name = string("op_5726"), val = tensor([3, 8, 256])]; + tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = var_5703)[name = string("transpose_78")]; + tensor x_161 = reshape(shape = var_5726, x = transpose_72)[name = string("x_161")]; + int32 var_5732 = const()[name = string("op_5732"), val = int32(-1)]; + fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; + tensor var_5734 = mul(x = x_161, y = const_95_promoted)[name = string("op_5734")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241 = concat(axis = var_5732, interleave = input_241_interleave_0, values = (x_161, var_5734))[name = string("input_241")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_5729_to_fp16 = const()[name = string("op_5729_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_5729_to_fp16, x = input_241)[name = string("normed_229_cast_fp16")]; + tensor var_5739_split_sizes_0 = const()[name = string("op_5739_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5739_axis_0 = const()[name = string("op_5739_axis_0"), val = int32(-1)]; + tensor var_5739_0, tensor var_5739_1 = split(axis = var_5739_axis_0, split_sizes = var_5739_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_5739")]; + tensor var_5746 = const()[name = string("op_5746"), val = tensor([1, 3, 8, 256])]; + tensor var_5747 = reshape(shape = var_5746, x = var_5739_0)[name = string("op_5747")]; + tensor var_5752 = const()[name = string("op_5752"), val = tensor([0, 2, 1, 3])]; + tensor q_103 = transpose(perm = var_5752, x = var_5747)[name = string("transpose_77")]; + tensor var_5754_cast_fp16 = mul(x = q_103, y = cos_s)[name = string("op_5754_cast_fp16")]; + tensor var_5755_split_sizes_0 = const()[name = string("op_5755_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5755_axis_0 = const()[name = string("op_5755_axis_0"), val = int32(-1)]; + tensor var_5755_0, tensor var_5755_1 = split(axis = var_5755_axis_0, split_sizes = var_5755_split_sizes_0, x = q_103)[name = string("op_5755")]; + fp16 const_96_promoted = const()[name = string("const_96_promoted"), val = fp16(-0x1p+0)]; + tensor var_5757 = mul(x = var_5755_1, y = const_96_promoted)[name = string("op_5757")]; + int32 var_5759 = const()[name = string("op_5759"), val = int32(-1)]; + bool var_5760_interleave_0 = const()[name = string("op_5760_interleave_0"), val = bool(false)]; + tensor var_5760 = concat(axis = var_5759, interleave = var_5760_interleave_0, values = (var_5757, var_5755_0))[name = string("op_5760")]; + tensor var_5761_cast_fp16 = mul(x = var_5760, y = sin_s)[name = string("op_5761_cast_fp16")]; + tensor q_107_cast_fp16 = add(x = var_5754_cast_fp16, y = var_5761_cast_fp16)[name = string("q_107_cast_fp16")]; + string k_51_pad_type_0 = const()[name = string("k_51_pad_type_0"), val = string("valid")]; + tensor k_51_strides_0 = const()[name = string("k_51_strides_0"), val = tensor([1, 1])]; + tensor k_51_pad_0 = const()[name = string("k_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_51_dilations_0 = const()[name = string("k_51_dilations_0"), val = tensor([1, 1])]; + int32 k_51_groups_0 = const()[name = string("k_51_groups_0"), val = int32(1)]; + tensor k_51 = conv(dilations = k_51_dilations_0, groups = k_51_groups_0, pad = k_51_pad_0, pad_type = k_51_pad_type_0, strides = k_51_strides_0, weight = layers_8_self_attn_k_proj_weight_palettized, x = var_5681_cast_fp16)[name = string("k_51")]; + tensor var_5779 = const()[name = string("op_5779"), val = tensor([1, 2, 256, 3])]; + tensor var_5780 = reshape(shape = var_5779, x = k_51)[name = string("op_5780")]; + tensor transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_19_pad_type_0 = const()[name = string("v_19_pad_type_0"), val = string("valid")]; + tensor v_19_strides_0 = const()[name = string("v_19_strides_0"), val = tensor([1, 1])]; + tensor v_19_pad_0 = const()[name = string("v_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_19_dilations_0 = const()[name = string("v_19_dilations_0"), val = tensor([1, 1])]; + int32 v_19_groups_0 = const()[name = string("v_19_groups_0"), val = int32(1)]; + tensor v_19 = conv(dilations = v_19_dilations_0, groups = v_19_groups_0, pad = v_19_pad_0, pad_type = v_19_pad_type_0, strides = v_19_strides_0, weight = layers_8_self_attn_v_proj_weight_palettized, x = var_5681_cast_fp16)[name = string("v_19")]; + tensor var_5807 = const()[name = string("op_5807"), val = tensor([1, 2, 256, 3])]; + tensor var_5808 = reshape(shape = var_5807, x = v_19)[name = string("op_5808")]; + tensor var_5813 = const()[name = string("op_5813"), val = tensor([0, 1, 3, 2])]; + tensor var_5831 = const()[name = string("op_5831"), val = tensor([3, 2, 256])]; + tensor transpose_73 = transpose(perm = transpose_73_perm_0, x = var_5780)[name = string("transpose_76")]; + tensor x_163 = reshape(shape = var_5831, x = transpose_73)[name = string("x_163")]; + int32 var_5837 = const()[name = string("op_5837"), val = int32(-1)]; + fp16 const_97_promoted = const()[name = string("const_97_promoted"), val = fp16(-0x1p+0)]; + tensor var_5839 = mul(x = x_163, y = const_97_promoted)[name = string("op_5839")]; + bool input_243_interleave_0 = const()[name = string("input_243_interleave_0"), val = bool(false)]; + tensor input_243 = concat(axis = var_5837, interleave = input_243_interleave_0, values = (x_163, var_5839))[name = string("input_243")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_5834_to_fp16 = const()[name = string("op_5834_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_5834_to_fp16, x = input_243)[name = string("normed_233_cast_fp16")]; + tensor var_5844_split_sizes_0 = const()[name = string("op_5844_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5844_axis_0 = const()[name = string("op_5844_axis_0"), val = int32(-1)]; + tensor var_5844_0, tensor var_5844_1 = split(axis = var_5844_axis_0, split_sizes = var_5844_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_5844")]; + tensor k_55 = mul(x = var_5844_0, y = layers_8_self_attn_k_norm_weight)[name = string("k_55")]; + tensor var_5851 = const()[name = string("op_5851"), val = tensor([1, 3, 2, 256])]; + tensor var_5852 = reshape(shape = var_5851, x = k_55)[name = string("op_5852")]; + tensor var_5857 = const()[name = string("op_5857"), val = tensor([0, 2, 1, 3])]; + fp16 var_5859_promoted = const()[name = string("op_5859_promoted"), val = fp16(0x1p+1)]; + tensor var_5814 = transpose(perm = var_5813, x = var_5808)[name = string("transpose_75")]; + tensor var_5860 = pow(x = var_5814, y = var_5859_promoted)[name = string("op_5860")]; + tensor var_5865_axes_0 = const()[name = string("op_5865_axes_0"), val = tensor([-1])]; + bool var_5865_keep_dims_0 = const()[name = string("op_5865_keep_dims_0"), val = bool(true)]; + tensor var_5865 = reduce_mean(axes = var_5865_axes_0, keep_dims = var_5865_keep_dims_0, x = var_5860)[name = string("op_5865")]; + fp16 var_5867_to_fp16 = const()[name = string("op_5867_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_17_cast_fp16 = add(x = var_5865, y = var_5867_to_fp16)[name = string("mean_sq_17_cast_fp16")]; + fp32 var_5869_epsilon_0 = const()[name = string("op_5869_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5869_cast_fp16 = rsqrt(epsilon = var_5869_epsilon_0, x = mean_sq_17_cast_fp16)[name = string("op_5869_cast_fp16")]; + tensor input_247_cast_fp16 = mul(x = var_5814, y = var_5869_cast_fp16)[name = string("input_247_cast_fp16")]; + tensor q_105 = transpose(perm = var_5857, x = var_5852)[name = string("transpose_74")]; + tensor var_5871_cast_fp16 = mul(x = q_105, y = cos_s)[name = string("op_5871_cast_fp16")]; + tensor var_5872_split_sizes_0 = const()[name = string("op_5872_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5872_axis_0 = const()[name = string("op_5872_axis_0"), val = int32(-1)]; + tensor var_5872_0, tensor var_5872_1 = split(axis = var_5872_axis_0, split_sizes = var_5872_split_sizes_0, x = q_105)[name = string("op_5872")]; + fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; + tensor var_5874 = mul(x = var_5872_1, y = const_98_promoted)[name = string("op_5874")]; + int32 var_5876 = const()[name = string("op_5876"), val = int32(-1)]; + bool var_5877_interleave_0 = const()[name = string("op_5877_interleave_0"), val = bool(false)]; + tensor var_5877 = concat(axis = var_5876, interleave = var_5877_interleave_0, values = (var_5874, var_5872_0))[name = string("op_5877")]; + tensor var_5878_cast_fp16 = mul(x = var_5877, y = sin_s)[name = string("op_5878_cast_fp16")]; + tensor input_245_cast_fp16 = add(x = var_5871_cast_fp16, y = var_5878_cast_fp16)[name = string("input_245_cast_fp16")]; + tensor k_padded_15_pad_0 = const()[name = string("k_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_15_mode_0 = const()[name = string("k_padded_15_mode_0"), val = string("constant")]; + fp16 const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_15_cast_fp16 = pad(constant_val = const_99_to_fp16, mode = k_padded_15_mode_0, pad = k_padded_15_pad_0, x = input_245_cast_fp16)[name = string("k_padded_15_cast_fp16")]; + tensor v_padded_15_pad_0 = const()[name = string("v_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_15_mode_0 = const()[name = string("v_padded_15_mode_0"), val = string("constant")]; + fp16 const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_15_cast_fp16 = pad(constant_val = const_100_to_fp16, mode = v_padded_15_mode_0, pad = v_padded_15_pad_0, x = input_247_cast_fp16)[name = string("v_padded_15_cast_fp16")]; + tensor slot_k_17_begin_0 = const()[name = string("slot_k_17_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor slot_k_17_end_0 = const()[name = string("slot_k_17_end_0"), val = tensor([8, 2, 512, 512])]; + tensor slot_k_17_end_mask_0 = const()[name = string("slot_k_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_17_cast_fp16 = slice_by_index(begin = slot_k_17_begin_0, end = slot_k_17_end_0, end_mask = slot_k_17_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("slot_k_17_cast_fp16")]; + tensor slot_v_17_begin_0 = const()[name = string("slot_v_17_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor slot_v_17_end_0 = const()[name = string("slot_v_17_end_0"), val = tensor([8, 2, 512, 512])]; + tensor slot_v_17_end_mask_0 = const()[name = string("slot_v_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_17_cast_fp16 = slice_by_index(begin = slot_v_17_begin_0, end = slot_v_17_end_0, end_mask = slot_v_17_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("slot_v_17_cast_fp16")]; + tensor var_5917_begin_0 = const()[name = string("op_5917_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_5917_end_0 = const()[name = string("op_5917_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5917_end_mask_0 = const()[name = string("op_5917_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5917_cast_fp16 = slice_by_index(begin = var_5917_begin_0, end = var_5917_end_0, end_mask = var_5917_end_mask_0, x = slot_k_17_cast_fp16)[name = string("op_5917_cast_fp16")]; + int32 var_5924 = const()[name = string("op_5924"), val = int32(2)]; + bool new_k_17_interleave_0 = const()[name = string("new_k_17_interleave_0"), val = bool(false)]; + tensor new_k_17_cast_fp16 = concat(axis = var_5924, interleave = new_k_17_interleave_0, values = (var_5917_cast_fp16, k_padded_15_cast_fp16))[name = string("new_k_17_cast_fp16")]; + tensor var_5940_begin_0 = const()[name = string("op_5940_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_5940_end_0 = const()[name = string("op_5940_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5940_end_mask_0 = const()[name = string("op_5940_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5940_cast_fp16 = slice_by_index(begin = var_5940_begin_0, end = var_5940_end_0, end_mask = var_5940_end_mask_0, x = slot_v_17_cast_fp16)[name = string("op_5940_cast_fp16")]; + int32 var_5947 = const()[name = string("op_5947"), val = int32(2)]; + bool new_v_17_interleave_0 = const()[name = string("new_v_17_interleave_0"), val = bool(false)]; + tensor new_v_17_cast_fp16 = concat(axis = var_5947, interleave = new_v_17_interleave_0, values = (var_5940_cast_fp16, v_padded_15_cast_fp16))[name = string("new_v_17_cast_fp16")]; + tensor var_5953_begin_0 = const()[name = string("op_5953_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5953_end_0 = const()[name = string("op_5953_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_5953_end_mask_0 = const()[name = string("op_5953_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5953_cast_fp16 = slice_by_index(begin = var_5953_begin_0, end = var_5953_end_0, end_mask = var_5953_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("op_5953_cast_fp16")]; + tensor var_5958_begin_0 = const()[name = string("op_5958_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_5958_end_0 = const()[name = string("op_5958_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_5958_end_mask_0 = const()[name = string("op_5958_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5958_cast_fp16 = slice_by_index(begin = var_5958_begin_0, end = var_5958_end_0, end_mask = var_5958_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("op_5958_cast_fp16")]; + int32 var_5960 = const()[name = string("op_5960"), val = int32(0)]; + bool K_sliding_out_15_interleave_0 = const()[name = string("K_sliding_out_15_interleave_0"), val = bool(false)]; + tensor K_sliding_out_15_cast_fp16 = concat(axis = var_5960, interleave = K_sliding_out_15_interleave_0, values = (var_5953_cast_fp16, new_k_17_cast_fp16, var_5958_cast_fp16))[name = string("K_sliding_out_15_cast_fp16")]; + tensor var_5966_begin_0 = const()[name = string("op_5966_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5966_end_0 = const()[name = string("op_5966_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_5966_end_mask_0 = const()[name = string("op_5966_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5966_cast_fp16 = slice_by_index(begin = var_5966_begin_0, end = var_5966_end_0, end_mask = var_5966_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("op_5966_cast_fp16")]; + tensor var_5971_begin_0 = const()[name = string("op_5971_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_5971_end_0 = const()[name = string("op_5971_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_5971_end_mask_0 = const()[name = string("op_5971_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5971_cast_fp16 = slice_by_index(begin = var_5971_begin_0, end = var_5971_end_0, end_mask = var_5971_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("op_5971_cast_fp16")]; + int32 var_5973 = const()[name = string("op_5973"), val = int32(0)]; + bool V_sliding_out_15_interleave_0 = const()[name = string("V_sliding_out_15_interleave_0"), val = bool(false)]; + tensor V_sliding_out_15_cast_fp16 = concat(axis = var_5973, interleave = V_sliding_out_15_interleave_0, values = (var_5966_cast_fp16, new_v_17_cast_fp16, var_5971_cast_fp16))[name = string("V_sliding_out_15_cast_fp16")]; + tensor var_5979_begin_0 = const()[name = string("op_5979_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5979_end_0 = const()[name = string("op_5979_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_5979_end_mask_0 = const()[name = string("op_5979_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5979_cast_fp16 = slice_by_index(begin = var_5979_begin_0, end = var_5979_end_0, end_mask = var_5979_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("op_5979_cast_fp16")]; + tensor K_for_attn_17_begin_0 = const()[name = string("K_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_17_end_0 = const()[name = string("K_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_17_end_mask_0 = const()[name = string("K_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_17_cast_fp16 = slice_by_index(begin = K_for_attn_17_begin_0, end = K_for_attn_17_end_0, end_mask = K_for_attn_17_end_mask_0, x = var_5979_cast_fp16)[name = string("K_for_attn_17_cast_fp16")]; + tensor var_5989_begin_0 = const()[name = string("op_5989_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5989_end_0 = const()[name = string("op_5989_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_5989_end_mask_0 = const()[name = string("op_5989_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5989_cast_fp16 = slice_by_index(begin = var_5989_begin_0, end = var_5989_end_0, end_mask = var_5989_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("op_5989_cast_fp16")]; + tensor V_for_attn_17_begin_0 = const()[name = string("V_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_17_end_0 = const()[name = string("V_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_17_end_mask_0 = const()[name = string("V_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_17_cast_fp16 = slice_by_index(begin = V_for_attn_17_begin_0, end = V_for_attn_17_end_0, end_mask = V_for_attn_17_end_mask_0, x = var_5989_cast_fp16)[name = string("V_for_attn_17_cast_fp16")]; + tensor transpose_32_perm_0 = const()[name = string("transpose_32_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_16_reps_0 = const()[name = string("tile_16_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_32_cast_fp16 = transpose(perm = transpose_32_perm_0, x = K_for_attn_17_cast_fp16)[name = string("transpose_73")]; + tensor tile_16_cast_fp16 = tile(reps = tile_16_reps_0, x = transpose_32_cast_fp16)[name = string("tile_16_cast_fp16")]; + tensor concat_34 = const()[name = string("concat_34"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_32_cast_fp16 = reshape(shape = concat_34, x = tile_16_cast_fp16)[name = string("reshape_32_cast_fp16")]; + tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_33_cast_fp16 = transpose(perm = transpose_33_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_72")]; + tensor reshape_33_cast_fp16 = reshape(shape = concat_35, x = transpose_33_cast_fp16)[name = string("reshape_33_cast_fp16")]; + tensor transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_17_reps_0 = const()[name = string("tile_17_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_34_cast_fp16 = transpose(perm = transpose_34_perm_0, x = V_for_attn_17_cast_fp16)[name = string("transpose_71")]; + tensor tile_17_cast_fp16 = tile(reps = tile_17_reps_0, x = transpose_34_cast_fp16)[name = string("tile_17_cast_fp16")]; + tensor concat_36 = const()[name = string("concat_36"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_34_cast_fp16 = reshape(shape = concat_36, x = tile_17_cast_fp16)[name = string("reshape_34_cast_fp16")]; + tensor transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_37 = const()[name = string("concat_37"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_35_cast_fp16 = transpose(perm = transpose_35_perm_0, x = reshape_34_cast_fp16)[name = string("transpose_70")]; + tensor reshape_35_cast_fp16 = reshape(shape = concat_37, x = transpose_35_cast_fp16)[name = string("reshape_35_cast_fp16")]; + tensor V_expanded_17_perm_0 = const()[name = string("V_expanded_17_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor transpose_74_cast_fp16 = transpose(perm = transpose_74_perm_0, x = reshape_33_cast_fp16)[name = string("transpose_69")]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_107_cast_fp16, y = transpose_74_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_167_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_167_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_167_cast_fp16)[name = string("reduce_max_8")]; + tensor var_6024 = sub(x = x_167_cast_fp16, y = reduce_max_8)[name = string("op_6024")]; + tensor var_6030 = exp(x = var_6024)[name = string("op_6030")]; + tensor var_6040_axes_0 = const()[name = string("op_6040_axes_0"), val = tensor([-1])]; + bool var_6040_keep_dims_0 = const()[name = string("op_6040_keep_dims_0"), val = bool(true)]; + tensor var_6040 = reduce_sum(axes = var_6040_axes_0, keep_dims = var_6040_keep_dims_0, x = var_6030)[name = string("op_6040")]; + tensor var_6046_cast_fp16 = real_div(x = var_6030, y = var_6040)[name = string("op_6046_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor V_expanded_17_cast_fp16 = transpose(perm = V_expanded_17_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_68")]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_6046_cast_fp16, y = V_expanded_17_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_6057 = const()[name = string("op_6057"), val = tensor([0, 2, 1, 3])]; + tensor var_6064 = const()[name = string("op_6064"), val = tensor([1, 3, -1])]; + tensor var_6058_cast_fp16 = transpose(perm = var_6057, x = attn_output_49_cast_fp16)[name = string("transpose_67")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_6064, x = var_6058_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_6069 = const()[name = string("op_6069"), val = tensor([0, 2, 1])]; + string var_6085_pad_type_0 = const()[name = string("op_6085_pad_type_0"), val = string("valid")]; + int32 var_6085_groups_0 = const()[name = string("op_6085_groups_0"), val = int32(1)]; + tensor var_6085_strides_0 = const()[name = string("op_6085_strides_0"), val = tensor([1])]; + tensor var_6085_pad_0 = const()[name = string("op_6085_pad_0"), val = tensor([0, 0])]; + tensor var_6085_dilations_0 = const()[name = string("op_6085_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557679808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560301312))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6070_cast_fp16 = transpose(perm = var_6069, x = attn_output_51_cast_fp16)[name = string("transpose_66")]; + tensor var_6085_cast_fp16 = conv(dilations = var_6085_dilations_0, groups = var_6085_groups_0, pad = var_6085_pad_0, pad_type = var_6085_pad_type_0, strides = var_6085_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_6070_cast_fp16)[name = string("op_6085_cast_fp16")]; + tensor var_6089 = const()[name = string("op_6089"), val = tensor([0, 2, 1])]; + int32 var_6095 = const()[name = string("op_6095"), val = int32(-1)]; + fp16 const_101_promoted_to_fp16 = const()[name = string("const_101_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_171_cast_fp16 = transpose(perm = var_6089, x = var_6085_cast_fp16)[name = string("transpose_65")]; + tensor var_6097_cast_fp16 = mul(x = x_171_cast_fp16, y = const_101_promoted_to_fp16)[name = string("op_6097_cast_fp16")]; + bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; + tensor input_251_cast_fp16 = concat(axis = var_6095, interleave = input_251_interleave_0, values = (x_171_cast_fp16, var_6097_cast_fp16))[name = string("input_251_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_6092_to_fp16 = const()[name = string("op_6092_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_6092_to_fp16, x = input_251_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor var_6102_split_sizes_0 = const()[name = string("op_6102_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6102_axis_0 = const()[name = string("op_6102_axis_0"), val = int32(-1)]; + tensor var_6102_cast_fp16_0, tensor var_6102_cast_fp16_1 = split(axis = var_6102_axis_0, split_sizes = var_6102_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_6102_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560303936)))]; + tensor attn_output_53_cast_fp16 = mul(x = var_6102_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor x_173_cast_fp16 = add(x = x_159_cast_fp16, y = attn_output_53_cast_fp16)[name = string("x_173_cast_fp16")]; + int32 var_6111 = const()[name = string("op_6111"), val = int32(-1)]; + fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6113_cast_fp16 = mul(x = x_173_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_6113_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_6111, interleave = input_253_interleave_0, values = (x_173_cast_fp16, var_6113_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_6108_to_fp16 = const()[name = string("op_6108_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_6108_to_fp16, x = input_253_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor var_6118_split_sizes_0 = const()[name = string("op_6118_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6118_axis_0 = const()[name = string("op_6118_axis_0"), val = int32(-1)]; + tensor var_6118_cast_fp16_0, tensor var_6118_cast_fp16_1 = split(axis = var_6118_axis_0, split_sizes = var_6118_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_6118_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560309120)))]; + tensor h_51_cast_fp16 = mul(x = var_6118_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_6129 = const()[name = string("op_6129"), val = tensor([0, 2, 1])]; + tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; + tensor var_6130 = transpose(perm = var_6129, x = h_51_cast_fp16)[name = string("transpose_64")]; + tensor input_255 = expand_dims(axes = input_255_axes_0, x = var_6130)[name = string("input_255")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_255)[name = string("gate_33")]; + string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; + tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; + tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; + int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; + tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_255)[name = string("up_17")]; + string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; + tensor input_257 = mul(x = gate_35, y = up_17)[name = string("input_257")]; + string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")]; + tensor mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)]; + tensor mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_257)[name = string("mlp_out_17")]; + tensor var_6170_axes_0 = const()[name = string("op_6170_axes_0"), val = tensor([2])]; + tensor var_6170 = squeeze(axes = var_6170_axes_0, x = mlp_out_17)[name = string("op_6170")]; + tensor var_6174 = const()[name = string("op_6174"), val = tensor([0, 2, 1])]; + int32 var_6180 = const()[name = string("op_6180"), val = int32(-1)]; + fp16 const_103_promoted = const()[name = string("const_103_promoted"), val = fp16(-0x1p+0)]; + tensor x_175 = transpose(perm = var_6174, x = var_6170)[name = string("transpose_63")]; + tensor var_6182 = mul(x = x_175, y = const_103_promoted)[name = string("op_6182")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259 = concat(axis = var_6180, interleave = input_259_interleave_0, values = (x_175, var_6182))[name = string("input_259")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_6177_to_fp16 = const()[name = string("op_6177_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_6177_to_fp16, x = input_259)[name = string("normed_245_cast_fp16")]; + tensor var_6187_split_sizes_0 = const()[name = string("op_6187_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6187_axis_0 = const()[name = string("op_6187_axis_0"), val = int32(-1)]; + tensor var_6187_0, tensor var_6187_1 = split(axis = var_6187_axis_0, split_sizes = var_6187_split_sizes_0, x = normed_245_cast_fp16)[name = string("op_6187")]; + tensor hidden_states_83 = mul(x = var_6187_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_173_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor([0, 0, 5120])]; + tensor per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor([1, 3, 5376])]; + tensor per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_17_cast_fp16 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_17_cast_fp16")]; + tensor var_6215 = const()[name = string("op_6215"), val = tensor([0, 2, 1])]; + tensor input_261_axes_0 = const()[name = string("input_261_axes_0"), val = tensor([2])]; + tensor var_6216 = transpose(perm = var_6215, x = hidden_states_85_cast_fp16)[name = string("transpose_62")]; + tensor input_261 = expand_dims(axes = input_261_axes_0, x = var_6216)[name = string("input_261")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_261)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_6235 = const()[name = string("op_6235"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_17_axes_0 = const()[name = string("per_layer_slice_conv_17_axes_0"), val = tensor([2])]; + tensor var_6236_cast_fp16 = transpose(perm = var_6235, x = per_layer_slice_17_cast_fp16)[name = string("transpose_61")]; + tensor per_layer_slice_conv_17_cast_fp16 = expand_dims(axes = per_layer_slice_conv_17_axes_0, x = var_6236_cast_fp16)[name = string("per_layer_slice_conv_17_cast_fp16")]; + tensor input_263_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_17_cast_fp16)[name = string("input_263_cast_fp16")]; + string gated_53_pad_type_0 = const()[name = string("gated_53_pad_type_0"), val = string("valid")]; + tensor gated_53_strides_0 = const()[name = string("gated_53_strides_0"), val = tensor([1, 1])]; + tensor gated_53_pad_0 = const()[name = string("gated_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_53_dilations_0 = const()[name = string("gated_53_dilations_0"), val = tensor([1, 1])]; + int32 gated_53_groups_0 = const()[name = string("gated_53_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560314304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560642048))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_53_cast_fp16 = conv(dilations = gated_53_dilations_0, groups = gated_53_groups_0, pad = gated_53_pad_0, pad_type = gated_53_pad_type_0, strides = gated_53_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_263_cast_fp16)[name = string("gated_53_cast_fp16")]; + tensor var_6252_axes_0 = const()[name = string("op_6252_axes_0"), val = tensor([2])]; + tensor var_6252_cast_fp16 = squeeze(axes = var_6252_axes_0, x = gated_53_cast_fp16)[name = string("op_6252_cast_fp16")]; + tensor var_6256 = const()[name = string("op_6256"), val = tensor([0, 2, 1])]; + int32 var_6262 = const()[name = string("op_6262"), val = int32(-1)]; + fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_177_cast_fp16 = transpose(perm = var_6256, x = var_6252_cast_fp16)[name = string("transpose_60")]; + tensor var_6264_cast_fp16 = mul(x = x_177_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_6264_cast_fp16")]; + bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; + tensor input_265_cast_fp16 = concat(axis = var_6262, interleave = input_265_interleave_0, values = (x_177_cast_fp16, var_6264_cast_fp16))[name = string("input_265_cast_fp16")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_6259_to_fp16 = const()[name = string("op_6259_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_6259_to_fp16, x = input_265_cast_fp16)[name = string("normed_249_cast_fp16")]; + tensor var_6269_split_sizes_0 = const()[name = string("op_6269_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6269_axis_0 = const()[name = string("op_6269_axis_0"), val = int32(-1)]; + tensor var_6269_cast_fp16_0, tensor var_6269_cast_fp16_1 = split(axis = var_6269_axis_0, split_sizes = var_6269_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_6269_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560644672)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_6269_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_91_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; + tensor const_105_promoted_to_fp16 = const()[name = string("const_105_promoted_to_fp16"), val = tensor([0x1.bap-2])]; + tensor x_179_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_105_promoted_to_fp16)[name = string("x_179_cast_fp16")]; + int32 var_6284 = const()[name = string("op_6284"), val = int32(-1)]; + fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6286_cast_fp16 = mul(x = x_179_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_6286_cast_fp16")]; + bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; + tensor input_267_cast_fp16 = concat(axis = var_6284, interleave = input_267_interleave_0, values = (x_179_cast_fp16, var_6286_cast_fp16))[name = string("input_267_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_6281_to_fp16 = const()[name = string("op_6281_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_6281_to_fp16, x = input_267_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor var_6291_split_sizes_0 = const()[name = string("op_6291_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6291_axis_0 = const()[name = string("op_6291_axis_0"), val = int32(-1)]; + tensor var_6291_cast_fp16_0, tensor var_6291_cast_fp16_1 = split(axis = var_6291_axis_0, split_sizes = var_6291_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_6291_cast_fp16")]; + tensor layers_9_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560649856)))]; + tensor h_55_cast_fp16 = mul(x = var_6291_cast_fp16_0, y = layers_9_input_layernorm_weight_promoted_to_fp16)[name = string("h_55_cast_fp16")]; + tensor var_6297 = const()[name = string("op_6297"), val = tensor([0, 2, 1])]; + tensor var_6300_axes_0 = const()[name = string("op_6300_axes_0"), val = tensor([2])]; + tensor var_6298_cast_fp16 = transpose(perm = var_6297, x = h_55_cast_fp16)[name = string("transpose_59")]; + tensor var_6300_cast_fp16 = expand_dims(axes = var_6300_axes_0, x = var_6298_cast_fp16)[name = string("op_6300_cast_fp16")]; + string q_109_pad_type_0 = const()[name = string("q_109_pad_type_0"), val = string("valid")]; + tensor q_109_strides_0 = const()[name = string("q_109_strides_0"), val = tensor([1, 1])]; + tensor q_109_pad_0 = const()[name = string("q_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_109_dilations_0 = const()[name = string("q_109_dilations_0"), val = tensor([1, 1])]; + int32 q_109_groups_0 = const()[name = string("q_109_groups_0"), val = int32(1)]; + tensor q_109 = conv(dilations = q_109_dilations_0, groups = q_109_groups_0, pad = q_109_pad_0, pad_type = q_109_pad_type_0, strides = q_109_strides_0, weight = layers_9_self_attn_q_proj_weight_palettized, x = var_6300_cast_fp16)[name = string("q_109")]; + tensor var_6321 = const()[name = string("op_6321"), val = tensor([1, 8, 256, 3])]; + tensor var_6322 = reshape(shape = var_6321, x = q_109)[name = string("op_6322")]; + tensor transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_6345 = const()[name = string("op_6345"), val = tensor([3, 8, 256])]; + tensor transpose_75 = transpose(perm = transpose_75_perm_0, x = var_6322)[name = string("transpose_58")]; + tensor x_181 = reshape(shape = var_6345, x = transpose_75)[name = string("x_181")]; + int32 var_6351 = const()[name = string("op_6351"), val = int32(-1)]; + fp16 const_107_promoted = const()[name = string("const_107_promoted"), val = fp16(-0x1p+0)]; + tensor var_6353 = mul(x = x_181, y = const_107_promoted)[name = string("op_6353")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271 = concat(axis = var_6351, interleave = input_271_interleave_0, values = (x_181, var_6353))[name = string("input_271")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_6348_to_fp16 = const()[name = string("op_6348_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_6348_to_fp16, x = input_271)[name = string("normed_257_cast_fp16")]; + tensor var_6358_split_sizes_0 = const()[name = string("op_6358_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6358_axis_0 = const()[name = string("op_6358_axis_0"), val = int32(-1)]; + tensor var_6358_0, tensor var_6358_1 = split(axis = var_6358_axis_0, split_sizes = var_6358_split_sizes_0, x = normed_257_cast_fp16)[name = string("op_6358")]; + tensor q_113 = mul(x = var_6358_0, y = layers_9_self_attn_q_norm_weight)[name = string("q_113")]; + tensor var_6365 = const()[name = string("op_6365"), val = tensor([1, 3, 8, 256])]; + tensor var_6366 = reshape(shape = var_6365, x = q_113)[name = string("op_6366")]; + tensor var_6371 = const()[name = string("op_6371"), val = tensor([0, 2, 1, 3])]; + tensor q_115 = transpose(perm = var_6371, x = var_6366)[name = string("transpose_57")]; + tensor var_6373_cast_fp16 = mul(x = q_115, y = cos_s)[name = string("op_6373_cast_fp16")]; + tensor var_6374_split_sizes_0 = const()[name = string("op_6374_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6374_axis_0 = const()[name = string("op_6374_axis_0"), val = int32(-1)]; + tensor var_6374_0, tensor var_6374_1 = split(axis = var_6374_axis_0, split_sizes = var_6374_split_sizes_0, x = q_115)[name = string("op_6374")]; + fp16 const_108_promoted = const()[name = string("const_108_promoted"), val = fp16(-0x1p+0)]; + tensor var_6376 = mul(x = var_6374_1, y = const_108_promoted)[name = string("op_6376")]; + int32 var_6378 = const()[name = string("op_6378"), val = int32(-1)]; + bool var_6379_interleave_0 = const()[name = string("op_6379_interleave_0"), val = bool(false)]; + tensor var_6379 = concat(axis = var_6378, interleave = var_6379_interleave_0, values = (var_6376, var_6374_0))[name = string("op_6379")]; + tensor var_6380_cast_fp16 = mul(x = var_6379, y = sin_s)[name = string("op_6380_cast_fp16")]; + tensor q_119_cast_fp16 = add(x = var_6373_cast_fp16, y = var_6380_cast_fp16)[name = string("q_119_cast_fp16")]; + string k_57_pad_type_0 = const()[name = string("k_57_pad_type_0"), val = string("valid")]; + tensor k_57_strides_0 = const()[name = string("k_57_strides_0"), val = tensor([1, 1])]; + tensor k_57_pad_0 = const()[name = string("k_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_57_dilations_0 = const()[name = string("k_57_dilations_0"), val = tensor([1, 1])]; + int32 k_57_groups_0 = const()[name = string("k_57_groups_0"), val = int32(1)]; + tensor k_57 = conv(dilations = k_57_dilations_0, groups = k_57_groups_0, pad = k_57_pad_0, pad_type = k_57_pad_type_0, strides = k_57_strides_0, weight = layers_9_self_attn_k_proj_weight_palettized, x = var_6300_cast_fp16)[name = string("k_57")]; + tensor var_6398 = const()[name = string("op_6398"), val = tensor([1, 2, 256, 3])]; + tensor var_6399 = reshape(shape = var_6398, x = k_57)[name = string("op_6399")]; + tensor transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_21_pad_type_0 = const()[name = string("v_21_pad_type_0"), val = string("valid")]; + tensor v_21_strides_0 = const()[name = string("v_21_strides_0"), val = tensor([1, 1])]; + tensor v_21_pad_0 = const()[name = string("v_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_21_dilations_0 = const()[name = string("v_21_dilations_0"), val = tensor([1, 1])]; + int32 v_21_groups_0 = const()[name = string("v_21_groups_0"), val = int32(1)]; + tensor v_21 = conv(dilations = v_21_dilations_0, groups = v_21_groups_0, pad = v_21_pad_0, pad_type = v_21_pad_type_0, strides = v_21_strides_0, weight = layers_9_self_attn_v_proj_weight_palettized, x = var_6300_cast_fp16)[name = string("v_21")]; + tensor var_6426 = const()[name = string("op_6426"), val = tensor([1, 2, 256, 3])]; + tensor var_6427 = reshape(shape = var_6426, x = v_21)[name = string("op_6427")]; + tensor var_6432 = const()[name = string("op_6432"), val = tensor([0, 1, 3, 2])]; + tensor var_6450 = const()[name = string("op_6450"), val = tensor([3, 2, 256])]; + tensor transpose_76 = transpose(perm = transpose_76_perm_0, x = var_6399)[name = string("transpose_56")]; + tensor x_183 = reshape(shape = var_6450, x = transpose_76)[name = string("x_183")]; + int32 var_6456 = const()[name = string("op_6456"), val = int32(-1)]; + fp16 const_109_promoted = const()[name = string("const_109_promoted"), val = fp16(-0x1p+0)]; + tensor var_6458 = mul(x = x_183, y = const_109_promoted)[name = string("op_6458")]; + bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; + tensor input_273 = concat(axis = var_6456, interleave = input_273_interleave_0, values = (x_183, var_6458))[name = string("input_273")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_6453_to_fp16 = const()[name = string("op_6453_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_6453_to_fp16, x = input_273)[name = string("normed_261_cast_fp16")]; + tensor var_6463_split_sizes_0 = const()[name = string("op_6463_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6463_axis_0 = const()[name = string("op_6463_axis_0"), val = int32(-1)]; + tensor var_6463_0, tensor var_6463_1 = split(axis = var_6463_axis_0, split_sizes = var_6463_split_sizes_0, x = normed_261_cast_fp16)[name = string("op_6463")]; + tensor k_61 = mul(x = var_6463_0, y = layers_9_self_attn_k_norm_weight)[name = string("k_61")]; + tensor var_6470 = const()[name = string("op_6470"), val = tensor([1, 3, 2, 256])]; + tensor var_6471 = reshape(shape = var_6470, x = k_61)[name = string("op_6471")]; + tensor var_6476 = const()[name = string("op_6476"), val = tensor([0, 2, 1, 3])]; + fp16 var_6478_promoted = const()[name = string("op_6478_promoted"), val = fp16(0x1p+1)]; + tensor var_6433 = transpose(perm = var_6432, x = var_6427)[name = string("transpose_55")]; + tensor var_6479 = pow(x = var_6433, y = var_6478_promoted)[name = string("op_6479")]; + tensor var_6484_axes_0 = const()[name = string("op_6484_axes_0"), val = tensor([-1])]; + bool var_6484_keep_dims_0 = const()[name = string("op_6484_keep_dims_0"), val = bool(true)]; + tensor var_6484 = reduce_mean(axes = var_6484_axes_0, keep_dims = var_6484_keep_dims_0, x = var_6479)[name = string("op_6484")]; + fp16 var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_19_cast_fp16 = add(x = var_6484, y = var_6486_to_fp16)[name = string("mean_sq_19_cast_fp16")]; + fp32 var_6488_epsilon_0 = const()[name = string("op_6488_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6488_cast_fp16 = rsqrt(epsilon = var_6488_epsilon_0, x = mean_sq_19_cast_fp16)[name = string("op_6488_cast_fp16")]; + tensor input_277_cast_fp16 = mul(x = var_6433, y = var_6488_cast_fp16)[name = string("input_277_cast_fp16")]; + tensor q_117 = transpose(perm = var_6476, x = var_6471)[name = string("transpose_54")]; + tensor var_6490_cast_fp16 = mul(x = q_117, y = cos_s)[name = string("op_6490_cast_fp16")]; + tensor var_6491_split_sizes_0 = const()[name = string("op_6491_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6491_axis_0 = const()[name = string("op_6491_axis_0"), val = int32(-1)]; + tensor var_6491_0, tensor var_6491_1 = split(axis = var_6491_axis_0, split_sizes = var_6491_split_sizes_0, x = q_117)[name = string("op_6491")]; + fp16 const_110_promoted = const()[name = string("const_110_promoted"), val = fp16(-0x1p+0)]; + tensor var_6493 = mul(x = var_6491_1, y = const_110_promoted)[name = string("op_6493")]; + int32 var_6495 = const()[name = string("op_6495"), val = int32(-1)]; + bool var_6496_interleave_0 = const()[name = string("op_6496_interleave_0"), val = bool(false)]; + tensor var_6496 = concat(axis = var_6495, interleave = var_6496_interleave_0, values = (var_6493, var_6491_0))[name = string("op_6496")]; + tensor var_6497_cast_fp16 = mul(x = var_6496, y = sin_s)[name = string("op_6497_cast_fp16")]; + tensor input_275_cast_fp16 = add(x = var_6490_cast_fp16, y = var_6497_cast_fp16)[name = string("input_275_cast_fp16")]; + tensor k_padded_17_pad_0 = const()[name = string("k_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_17_mode_0 = const()[name = string("k_padded_17_mode_0"), val = string("constant")]; + fp16 const_111_to_fp16 = const()[name = string("const_111_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_17_cast_fp16 = pad(constant_val = const_111_to_fp16, mode = k_padded_17_mode_0, pad = k_padded_17_pad_0, x = input_275_cast_fp16)[name = string("k_padded_17_cast_fp16")]; + tensor v_padded_17_pad_0 = const()[name = string("v_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_17_mode_0 = const()[name = string("v_padded_17_mode_0"), val = string("constant")]; + fp16 const_112_to_fp16 = const()[name = string("const_112_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_17_cast_fp16 = pad(constant_val = const_112_to_fp16, mode = v_padded_17_mode_0, pad = v_padded_17_pad_0, x = input_277_cast_fp16)[name = string("v_padded_17_cast_fp16")]; + tensor slot_k_19_begin_0 = const()[name = string("slot_k_19_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor slot_k_19_end_0 = const()[name = string("slot_k_19_end_0"), val = tensor([9, 2, 512, 512])]; + tensor slot_k_19_end_mask_0 = const()[name = string("slot_k_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_k_19_cast_fp16 = slice_by_index(begin = slot_k_19_begin_0, end = slot_k_19_end_0, end_mask = slot_k_19_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("slot_k_19_cast_fp16")]; + tensor slot_v_19_begin_0 = const()[name = string("slot_v_19_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor slot_v_19_end_0 = const()[name = string("slot_v_19_end_0"), val = tensor([9, 2, 512, 512])]; + tensor slot_v_19_end_mask_0 = const()[name = string("slot_v_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor slot_v_19_cast_fp16 = slice_by_index(begin = slot_v_19_begin_0, end = slot_v_19_end_0, end_mask = slot_v_19_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("slot_v_19_cast_fp16")]; + tensor var_6536_begin_0 = const()[name = string("op_6536_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_6536_end_0 = const()[name = string("op_6536_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6536_end_mask_0 = const()[name = string("op_6536_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6536_cast_fp16 = slice_by_index(begin = var_6536_begin_0, end = var_6536_end_0, end_mask = var_6536_end_mask_0, x = slot_k_19_cast_fp16)[name = string("op_6536_cast_fp16")]; + int32 var_6543 = const()[name = string("op_6543"), val = int32(2)]; + bool new_k_19_interleave_0 = const()[name = string("new_k_19_interleave_0"), val = bool(false)]; + tensor new_k_19_cast_fp16 = concat(axis = var_6543, interleave = new_k_19_interleave_0, values = (var_6536_cast_fp16, k_padded_17_cast_fp16))[name = string("new_k_19_cast_fp16")]; + tensor var_6559_begin_0 = const()[name = string("op_6559_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_6559_end_0 = const()[name = string("op_6559_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6559_end_mask_0 = const()[name = string("op_6559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6559_cast_fp16 = slice_by_index(begin = var_6559_begin_0, end = var_6559_end_0, end_mask = var_6559_end_mask_0, x = slot_v_19_cast_fp16)[name = string("op_6559_cast_fp16")]; + int32 var_6566 = const()[name = string("op_6566"), val = int32(2)]; + bool new_v_19_interleave_0 = const()[name = string("new_v_19_interleave_0"), val = bool(false)]; + tensor new_v_19_cast_fp16 = concat(axis = var_6566, interleave = new_v_19_interleave_0, values = (var_6559_cast_fp16, v_padded_17_cast_fp16))[name = string("new_v_19_cast_fp16")]; + tensor var_6572_begin_0 = const()[name = string("op_6572_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6572_end_0 = const()[name = string("op_6572_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_6572_end_mask_0 = const()[name = string("op_6572_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6572_cast_fp16 = slice_by_index(begin = var_6572_begin_0, end = var_6572_end_0, end_mask = var_6572_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("op_6572_cast_fp16")]; + tensor var_6577_begin_0 = const()[name = string("op_6577_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6577_end_0 = const()[name = string("op_6577_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6577_end_mask_0 = const()[name = string("op_6577_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6577_cast_fp16 = slice_by_index(begin = var_6577_begin_0, end = var_6577_end_0, end_mask = var_6577_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("op_6577_cast_fp16")]; + int32 var_6579 = const()[name = string("op_6579"), val = int32(0)]; + bool K_sliding_out_17_interleave_0 = const()[name = string("K_sliding_out_17_interleave_0"), val = bool(false)]; + tensor K_sliding_out_17_cast_fp16 = concat(axis = var_6579, interleave = K_sliding_out_17_interleave_0, values = (var_6572_cast_fp16, new_k_19_cast_fp16, var_6577_cast_fp16))[name = string("K_sliding_out_17_cast_fp16")]; + tensor var_6585_begin_0 = const()[name = string("op_6585_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6585_end_0 = const()[name = string("op_6585_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_6585_end_mask_0 = const()[name = string("op_6585_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6585_cast_fp16 = slice_by_index(begin = var_6585_begin_0, end = var_6585_end_0, end_mask = var_6585_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("op_6585_cast_fp16")]; + tensor var_6590_begin_0 = const()[name = string("op_6590_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6590_end_0 = const()[name = string("op_6590_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6590_end_mask_0 = const()[name = string("op_6590_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6590_cast_fp16 = slice_by_index(begin = var_6590_begin_0, end = var_6590_end_0, end_mask = var_6590_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("op_6590_cast_fp16")]; + int32 var_6592 = const()[name = string("op_6592"), val = int32(0)]; + bool V_sliding_out_17_interleave_0 = const()[name = string("V_sliding_out_17_interleave_0"), val = bool(false)]; + tensor V_sliding_out_17_cast_fp16 = concat(axis = var_6592, interleave = V_sliding_out_17_interleave_0, values = (var_6585_cast_fp16, new_v_19_cast_fp16, var_6590_cast_fp16))[name = string("V_sliding_out_17_cast_fp16")]; + tensor var_6598_begin_0 = const()[name = string("op_6598_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6598_end_0 = const()[name = string("op_6598_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_6598_end_mask_0 = const()[name = string("op_6598_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6598_cast_fp16 = slice_by_index(begin = var_6598_begin_0, end = var_6598_end_0, end_mask = var_6598_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("op_6598_cast_fp16")]; + tensor K_for_attn_19_begin_0 = const()[name = string("K_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_19_end_0 = const()[name = string("K_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_19_end_mask_0 = const()[name = string("K_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_19_cast_fp16 = slice_by_index(begin = K_for_attn_19_begin_0, end = K_for_attn_19_end_0, end_mask = K_for_attn_19_end_mask_0, x = var_6598_cast_fp16)[name = string("K_for_attn_19_cast_fp16")]; + tensor var_6608_begin_0 = const()[name = string("op_6608_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6608_end_0 = const()[name = string("op_6608_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_6608_end_mask_0 = const()[name = string("op_6608_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6608_cast_fp16 = slice_by_index(begin = var_6608_begin_0, end = var_6608_end_0, end_mask = var_6608_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("op_6608_cast_fp16")]; + tensor V_for_attn_19_begin_0 = const()[name = string("V_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_19_end_0 = const()[name = string("V_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_19_end_mask_0 = const()[name = string("V_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_19_cast_fp16 = slice_by_index(begin = V_for_attn_19_begin_0, end = V_for_attn_19_end_0, end_mask = V_for_attn_19_end_mask_0, x = var_6608_cast_fp16)[name = string("V_for_attn_19_cast_fp16")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_18_reps_0 = const()[name = string("tile_18_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = K_for_attn_19_cast_fp16)[name = string("transpose_53")]; + tensor tile_18_cast_fp16 = tile(reps = tile_18_reps_0, x = transpose_36_cast_fp16)[name = string("tile_18_cast_fp16")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_36_cast_fp16 = reshape(shape = concat_38, x = tile_18_cast_fp16)[name = string("reshape_36_cast_fp16")]; + tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_39 = const()[name = string("concat_39"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = reshape_36_cast_fp16)[name = string("transpose_52")]; + tensor reshape_37_cast_fp16 = reshape(shape = concat_39, x = transpose_37_cast_fp16)[name = string("reshape_37_cast_fp16")]; + tensor transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_19_reps_0 = const()[name = string("tile_19_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_38_cast_fp16 = transpose(perm = transpose_38_perm_0, x = V_for_attn_19_cast_fp16)[name = string("transpose_51")]; + tensor tile_19_cast_fp16 = tile(reps = tile_19_reps_0, x = transpose_38_cast_fp16)[name = string("tile_19_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_38_cast_fp16 = reshape(shape = concat_40, x = tile_19_cast_fp16)[name = string("reshape_38_cast_fp16")]; + tensor transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_39_cast_fp16 = transpose(perm = transpose_39_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_50")]; + tensor reshape_39_cast_fp16 = reshape(shape = concat_41, x = transpose_39_cast_fp16)[name = string("reshape_39_cast_fp16")]; + tensor V_expanded_19_perm_0 = const()[name = string("V_expanded_19_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_37_transpose_x_0 = const()[name = string("attn_weights_37_transpose_x_0"), val = bool(false)]; + bool attn_weights_37_transpose_y_0 = const()[name = string("attn_weights_37_transpose_y_0"), val = bool(false)]; + tensor transpose_77_cast_fp16 = transpose(perm = transpose_77_perm_0, x = reshape_37_cast_fp16)[name = string("transpose_49")]; + tensor attn_weights_37_cast_fp16 = matmul(transpose_x = attn_weights_37_transpose_x_0, transpose_y = attn_weights_37_transpose_y_0, x = q_119_cast_fp16, y = transpose_77_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor x_187_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask_sliding)[name = string("x_187_cast_fp16")]; + tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; + bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; + tensor reduce_max_9 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_187_cast_fp16)[name = string("reduce_max_9")]; + tensor var_6643 = sub(x = x_187_cast_fp16, y = reduce_max_9)[name = string("op_6643")]; + tensor var_6649 = exp(x = var_6643)[name = string("op_6649")]; + tensor var_6659_axes_0 = const()[name = string("op_6659_axes_0"), val = tensor([-1])]; + bool var_6659_keep_dims_0 = const()[name = string("op_6659_keep_dims_0"), val = bool(true)]; + tensor var_6659 = reduce_sum(axes = var_6659_axes_0, keep_dims = var_6659_keep_dims_0, x = var_6649)[name = string("op_6659")]; + tensor var_6665_cast_fp16 = real_div(x = var_6649, y = var_6659)[name = string("op_6665_cast_fp16")]; + bool attn_output_55_transpose_x_0 = const()[name = string("attn_output_55_transpose_x_0"), val = bool(false)]; + bool attn_output_55_transpose_y_0 = const()[name = string("attn_output_55_transpose_y_0"), val = bool(false)]; + tensor V_expanded_19_cast_fp16 = transpose(perm = V_expanded_19_perm_0, x = reshape_39_cast_fp16)[name = string("transpose_48")]; + tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_0, transpose_y = attn_output_55_transpose_y_0, x = var_6665_cast_fp16, y = V_expanded_19_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_6676 = const()[name = string("op_6676"), val = tensor([0, 2, 1, 3])]; + tensor var_6683 = const()[name = string("op_6683"), val = tensor([1, 3, -1])]; + tensor var_6677_cast_fp16 = transpose(perm = var_6676, x = attn_output_55_cast_fp16)[name = string("transpose_47")]; + tensor attn_output_57_cast_fp16 = reshape(shape = var_6683, x = var_6677_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_6688 = const()[name = string("op_6688"), val = tensor([0, 2, 1])]; + string var_6704_pad_type_0 = const()[name = string("op_6704_pad_type_0"), val = string("valid")]; + int32 var_6704_groups_0 = const()[name = string("op_6704_groups_0"), val = int32(1)]; + tensor var_6704_strides_0 = const()[name = string("op_6704_strides_0"), val = tensor([1])]; + tensor var_6704_pad_0 = const()[name = string("op_6704_pad_0"), val = tensor([0, 0])]; + tensor var_6704_dilations_0 = const()[name = string("op_6704_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560655040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563276544))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6689_cast_fp16 = transpose(perm = var_6688, x = attn_output_57_cast_fp16)[name = string("transpose_46")]; + tensor var_6704_cast_fp16 = conv(dilations = var_6704_dilations_0, groups = var_6704_groups_0, pad = var_6704_pad_0, pad_type = var_6704_pad_type_0, strides = var_6704_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6689_cast_fp16)[name = string("op_6704_cast_fp16")]; + tensor var_6708 = const()[name = string("op_6708"), val = tensor([0, 2, 1])]; + int32 var_6714 = const()[name = string("op_6714"), val = int32(-1)]; + fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_191_cast_fp16 = transpose(perm = var_6708, x = var_6704_cast_fp16)[name = string("transpose_45")]; + tensor var_6716_cast_fp16 = mul(x = x_191_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_6716_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_6714, interleave = input_281_interleave_0, values = (x_191_cast_fp16, var_6716_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_6711_to_fp16 = const()[name = string("op_6711_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_6711_to_fp16, x = input_281_cast_fp16)[name = string("normed_265_cast_fp16")]; + tensor var_6721_split_sizes_0 = const()[name = string("op_6721_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6721_axis_0 = const()[name = string("op_6721_axis_0"), val = int32(-1)]; + tensor var_6721_cast_fp16_0, tensor var_6721_cast_fp16_1 = split(axis = var_6721_axis_0, split_sizes = var_6721_split_sizes_0, x = normed_265_cast_fp16)[name = string("op_6721_cast_fp16")]; + tensor layers_9_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563279168)))]; + tensor attn_output_59_cast_fp16 = mul(x = var_6721_cast_fp16_0, y = layers_9_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor x_193_cast_fp16 = add(x = x_179_cast_fp16, y = attn_output_59_cast_fp16)[name = string("x_193_cast_fp16")]; + int32 var_6730 = const()[name = string("op_6730"), val = int32(-1)]; + fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6732_cast_fp16 = mul(x = x_193_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_6732_cast_fp16")]; + bool input_283_interleave_0 = const()[name = string("input_283_interleave_0"), val = bool(false)]; + tensor input_283_cast_fp16 = concat(axis = var_6730, interleave = input_283_interleave_0, values = (x_193_cast_fp16, var_6732_cast_fp16))[name = string("input_283_cast_fp16")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_6727_to_fp16 = const()[name = string("op_6727_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_6727_to_fp16, x = input_283_cast_fp16)[name = string("normed_269_cast_fp16")]; + tensor var_6737_split_sizes_0 = const()[name = string("op_6737_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6737_axis_0 = const()[name = string("op_6737_axis_0"), val = int32(-1)]; + tensor var_6737_cast_fp16_0, tensor var_6737_cast_fp16_1 = split(axis = var_6737_axis_0, split_sizes = var_6737_split_sizes_0, x = normed_269_cast_fp16)[name = string("op_6737_cast_fp16")]; + tensor layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563284352)))]; + tensor h_57_cast_fp16 = mul(x = var_6737_cast_fp16_0, y = layers_9_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_57_cast_fp16")]; + tensor var_6748 = const()[name = string("op_6748"), val = tensor([0, 2, 1])]; + tensor input_285_axes_0 = const()[name = string("input_285_axes_0"), val = tensor([2])]; + tensor var_6749 = transpose(perm = var_6748, x = h_57_cast_fp16)[name = string("transpose_44")]; + tensor input_285 = expand_dims(axes = input_285_axes_0, x = var_6749)[name = string("input_285")]; + string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; + tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; + tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; + int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; + tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_9_mlp_gate_proj_weight_palettized, x = input_285)[name = string("gate_37")]; + string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; + tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; + tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; + int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; + tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = layers_9_mlp_up_proj_weight_palettized, x = input_285)[name = string("up_19")]; + string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; + tensor input_287 = mul(x = gate_39, y = up_19)[name = string("input_287")]; + string mlp_out_19_pad_type_0 = const()[name = string("mlp_out_19_pad_type_0"), val = string("valid")]; + tensor mlp_out_19_strides_0 = const()[name = string("mlp_out_19_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_19_pad_0 = const()[name = string("mlp_out_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_19_dilations_0 = const()[name = string("mlp_out_19_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_19_groups_0 = const()[name = string("mlp_out_19_groups_0"), val = int32(1)]; + tensor mlp_out_19 = conv(dilations = mlp_out_19_dilations_0, groups = mlp_out_19_groups_0, pad = mlp_out_19_pad_0, pad_type = mlp_out_19_pad_type_0, strides = mlp_out_19_strides_0, weight = layers_9_mlp_down_proj_weight_palettized, x = input_287)[name = string("mlp_out_19")]; + tensor var_6789_axes_0 = const()[name = string("op_6789_axes_0"), val = tensor([2])]; + tensor var_6789 = squeeze(axes = var_6789_axes_0, x = mlp_out_19)[name = string("op_6789")]; + tensor var_6793 = const()[name = string("op_6793"), val = tensor([0, 2, 1])]; + int32 var_6799 = const()[name = string("op_6799"), val = int32(-1)]; + fp16 const_115_promoted = const()[name = string("const_115_promoted"), val = fp16(-0x1p+0)]; + tensor x_195 = transpose(perm = var_6793, x = var_6789)[name = string("transpose_43")]; + tensor var_6801 = mul(x = x_195, y = const_115_promoted)[name = string("op_6801")]; + bool input_289_interleave_0 = const()[name = string("input_289_interleave_0"), val = bool(false)]; + tensor input_289 = concat(axis = var_6799, interleave = input_289_interleave_0, values = (x_195, var_6801))[name = string("input_289")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_6796_to_fp16 = const()[name = string("op_6796_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_6796_to_fp16, x = input_289)[name = string("normed_273_cast_fp16")]; + tensor var_6806_split_sizes_0 = const()[name = string("op_6806_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6806_axis_0 = const()[name = string("op_6806_axis_0"), val = int32(-1)]; + tensor var_6806_0, tensor var_6806_1 = split(axis = var_6806_axis_0, split_sizes = var_6806_split_sizes_0, x = normed_273_cast_fp16)[name = string("op_6806")]; + tensor hidden_states_93 = mul(x = var_6806_0, y = layers_9_post_feedforward_layernorm_weight)[name = string("hidden_states_93")]; + tensor hidden_states_95_cast_fp16 = add(x = x_193_cast_fp16, y = hidden_states_93)[name = string("hidden_states_95_cast_fp16")]; + tensor per_layer_slice_19_begin_0 = const()[name = string("per_layer_slice_19_begin_0"), val = tensor([0, 0, 5376])]; + tensor per_layer_slice_19_end_0 = const()[name = string("per_layer_slice_19_end_0"), val = tensor([1, 3, 5632])]; + tensor per_layer_slice_19_end_mask_0 = const()[name = string("per_layer_slice_19_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_19_cast_fp16 = slice_by_index(begin = per_layer_slice_19_begin_0, end = per_layer_slice_19_end_0, end_mask = per_layer_slice_19_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_19_cast_fp16")]; + tensor var_6834 = const()[name = string("op_6834"), val = tensor([0, 2, 1])]; + tensor input_291_axes_0 = const()[name = string("input_291_axes_0"), val = tensor([2])]; + tensor var_6835 = transpose(perm = var_6834, x = hidden_states_95_cast_fp16)[name = string("transpose_42")]; + tensor input_291 = expand_dims(axes = input_291_axes_0, x = var_6835)[name = string("input_291")]; + string gated_55_pad_type_0 = const()[name = string("gated_55_pad_type_0"), val = string("valid")]; + tensor gated_55_strides_0 = const()[name = string("gated_55_strides_0"), val = tensor([1, 1])]; + tensor gated_55_pad_0 = const()[name = string("gated_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_55_dilations_0 = const()[name = string("gated_55_dilations_0"), val = tensor([1, 1])]; + int32 gated_55_groups_0 = const()[name = string("gated_55_groups_0"), val = int32(1)]; + tensor gated_55 = conv(dilations = gated_55_dilations_0, groups = gated_55_groups_0, pad = gated_55_pad_0, pad_type = gated_55_pad_type_0, strides = gated_55_strides_0, weight = layers_9_per_layer_input_gate_weight_palettized, x = input_291)[name = string("gated_55")]; + string gated_57_mode_0 = const()[name = string("gated_57_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_57 = gelu(mode = gated_57_mode_0, x = gated_55)[name = string("gated_57")]; + tensor var_6854 = const()[name = string("op_6854"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_19_axes_0 = const()[name = string("per_layer_slice_conv_19_axes_0"), val = tensor([2])]; + tensor var_6855_cast_fp16 = transpose(perm = var_6854, x = per_layer_slice_19_cast_fp16)[name = string("transpose_41")]; + tensor per_layer_slice_conv_19_cast_fp16 = expand_dims(axes = per_layer_slice_conv_19_axes_0, x = var_6855_cast_fp16)[name = string("per_layer_slice_conv_19_cast_fp16")]; + tensor input_293_cast_fp16 = mul(x = gated_57, y = per_layer_slice_conv_19_cast_fp16)[name = string("input_293_cast_fp16")]; + string gated_59_pad_type_0 = const()[name = string("gated_59_pad_type_0"), val = string("valid")]; + tensor gated_59_strides_0 = const()[name = string("gated_59_strides_0"), val = tensor([1, 1])]; + tensor gated_59_pad_0 = const()[name = string("gated_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_59_dilations_0 = const()[name = string("gated_59_dilations_0"), val = tensor([1, 1])]; + int32 gated_59_groups_0 = const()[name = string("gated_59_groups_0"), val = int32(1)]; + tensor layers_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563289536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563617280))))[name = string("layers_9_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_59_cast_fp16 = conv(dilations = gated_59_dilations_0, groups = gated_59_groups_0, pad = gated_59_pad_0, pad_type = gated_59_pad_type_0, strides = gated_59_strides_0, weight = layers_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_293_cast_fp16)[name = string("gated_59_cast_fp16")]; + tensor var_6871_axes_0 = const()[name = string("op_6871_axes_0"), val = tensor([2])]; + tensor var_6871_cast_fp16 = squeeze(axes = var_6871_axes_0, x = gated_59_cast_fp16)[name = string("op_6871_cast_fp16")]; + tensor var_6875 = const()[name = string("op_6875"), val = tensor([0, 2, 1])]; + int32 var_6881 = const()[name = string("op_6881"), val = int32(-1)]; + fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_197_cast_fp16 = transpose(perm = var_6875, x = var_6871_cast_fp16)[name = string("transpose_40")]; + tensor var_6883_cast_fp16 = mul(x = x_197_cast_fp16, y = const_116_promoted_to_fp16)[name = string("op_6883_cast_fp16")]; + bool input_295_interleave_0 = const()[name = string("input_295_interleave_0"), val = bool(false)]; + tensor input_295_cast_fp16 = concat(axis = var_6881, interleave = input_295_interleave_0, values = (x_197_cast_fp16, var_6883_cast_fp16))[name = string("input_295_cast_fp16")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_6878_to_fp16 = const()[name = string("op_6878_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_6878_to_fp16, x = input_295_cast_fp16)[name = string("normed_277_cast_fp16")]; + tensor var_6888_split_sizes_0 = const()[name = string("op_6888_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6888_axis_0 = const()[name = string("op_6888_axis_0"), val = int32(-1)]; + tensor var_6888_cast_fp16_0, tensor var_6888_cast_fp16_1 = split(axis = var_6888_axis_0, split_sizes = var_6888_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_6888_cast_fp16")]; + tensor layers_9_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_9_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563619904)))]; + tensor hidden_states_99_cast_fp16 = mul(x = var_6888_cast_fp16_0, y = layers_9_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + tensor const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = tensor([0x1.d8p-2])]; + tensor x_199_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_117_promoted_to_fp16)[name = string("x_199_cast_fp16")]; + int32 var_6903 = const()[name = string("op_6903"), val = int32(-1)]; + fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6905_cast_fp16 = mul(x = x_199_cast_fp16, y = const_118_promoted_to_fp16)[name = string("op_6905_cast_fp16")]; + bool input_297_interleave_0 = const()[name = string("input_297_interleave_0"), val = bool(false)]; + tensor input_297_cast_fp16 = concat(axis = var_6903, interleave = input_297_interleave_0, values = (x_199_cast_fp16, var_6905_cast_fp16))[name = string("input_297_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_6900_to_fp16 = const()[name = string("op_6900_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_6900_to_fp16, x = input_297_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor var_6910_split_sizes_0 = const()[name = string("op_6910_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6910_axis_0 = const()[name = string("op_6910_axis_0"), val = int32(-1)]; + tensor var_6910_cast_fp16_0, tensor var_6910_cast_fp16_1 = split(axis = var_6910_axis_0, split_sizes = var_6910_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_6910_cast_fp16")]; + tensor layers_10_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563625088)))]; + tensor h_61_cast_fp16 = mul(x = var_6910_cast_fp16_0, y = layers_10_input_layernorm_weight_promoted_to_fp16)[name = string("h_61_cast_fp16")]; + tensor var_6916 = const()[name = string("op_6916"), val = tensor([0, 2, 1])]; + tensor var_6919_axes_0 = const()[name = string("op_6919_axes_0"), val = tensor([2])]; + tensor var_6917_cast_fp16 = transpose(perm = var_6916, x = h_61_cast_fp16)[name = string("transpose_39")]; + tensor var_6919_cast_fp16 = expand_dims(axes = var_6919_axes_0, x = var_6917_cast_fp16)[name = string("op_6919_cast_fp16")]; + string q_121_pad_type_0 = const()[name = string("q_121_pad_type_0"), val = string("valid")]; + tensor q_121_strides_0 = const()[name = string("q_121_strides_0"), val = tensor([1, 1])]; + tensor q_121_pad_0 = const()[name = string("q_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_121_dilations_0 = const()[name = string("q_121_dilations_0"), val = tensor([1, 1])]; + int32 q_121_groups_0 = const()[name = string("q_121_groups_0"), val = int32(1)]; + tensor q_121 = conv(dilations = q_121_dilations_0, groups = q_121_groups_0, pad = q_121_pad_0, pad_type = q_121_pad_type_0, strides = q_121_strides_0, weight = layers_10_self_attn_q_proj_weight_palettized, x = var_6919_cast_fp16)[name = string("q_121")]; + tensor var_6940 = const()[name = string("op_6940"), val = tensor([1, 8, 256, 3])]; + tensor var_6941 = reshape(shape = var_6940, x = q_121)[name = string("op_6941")]; + tensor transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_6964 = const()[name = string("op_6964"), val = tensor([3, 8, 256])]; + tensor transpose_78 = transpose(perm = transpose_78_perm_0, x = var_6941)[name = string("transpose_38")]; + tensor x_201 = reshape(shape = var_6964, x = transpose_78)[name = string("x_201")]; + int32 var_6970 = const()[name = string("op_6970"), val = int32(-1)]; + fp16 const_119_promoted = const()[name = string("const_119_promoted"), val = fp16(-0x1p+0)]; + tensor var_6972 = mul(x = x_201, y = const_119_promoted)[name = string("op_6972")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301 = concat(axis = var_6970, interleave = input_301_interleave_0, values = (x_201, var_6972))[name = string("input_301")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_6967_to_fp16 = const()[name = string("op_6967_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_6967_to_fp16, x = input_301)[name = string("normed_285_cast_fp16")]; + tensor var_6977_split_sizes_0 = const()[name = string("op_6977_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6977_axis_0 = const()[name = string("op_6977_axis_0"), val = int32(-1)]; + tensor var_6977_0, tensor var_6977_1 = split(axis = var_6977_axis_0, split_sizes = var_6977_split_sizes_0, x = normed_285_cast_fp16)[name = string("op_6977")]; + tensor q_125 = mul(x = var_6977_0, y = layers_10_self_attn_q_norm_weight)[name = string("q_125")]; + tensor var_6984 = const()[name = string("op_6984"), val = tensor([1, 3, 8, 256])]; + tensor var_6985 = reshape(shape = var_6984, x = q_125)[name = string("op_6985")]; + tensor var_6990 = const()[name = string("op_6990"), val = tensor([0, 2, 1, 3])]; + tensor q_127 = transpose(perm = var_6990, x = var_6985)[name = string("transpose_37")]; + tensor var_6992_cast_fp16 = mul(x = q_127, y = cos_s)[name = string("op_6992_cast_fp16")]; + tensor var_6993_split_sizes_0 = const()[name = string("op_6993_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6993_axis_0 = const()[name = string("op_6993_axis_0"), val = int32(-1)]; + tensor var_6993_0, tensor var_6993_1 = split(axis = var_6993_axis_0, split_sizes = var_6993_split_sizes_0, x = q_127)[name = string("op_6993")]; + fp16 const_120_promoted = const()[name = string("const_120_promoted"), val = fp16(-0x1p+0)]; + tensor var_6995 = mul(x = var_6993_1, y = const_120_promoted)[name = string("op_6995")]; + int32 var_6997 = const()[name = string("op_6997"), val = int32(-1)]; + bool var_6998_interleave_0 = const()[name = string("op_6998_interleave_0"), val = bool(false)]; + tensor var_6998 = concat(axis = var_6997, interleave = var_6998_interleave_0, values = (var_6995, var_6993_0))[name = string("op_6998")]; + tensor var_6999_cast_fp16 = mul(x = var_6998, y = sin_s)[name = string("op_6999_cast_fp16")]; + tensor q_131_cast_fp16 = add(x = var_6992_cast_fp16, y = var_6999_cast_fp16)[name = string("q_131_cast_fp16")]; + string k_63_pad_type_0 = const()[name = string("k_63_pad_type_0"), val = string("valid")]; + tensor k_63_strides_0 = const()[name = string("k_63_strides_0"), val = tensor([1, 1])]; + tensor k_63_pad_0 = const()[name = string("k_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_63_dilations_0 = const()[name = string("k_63_dilations_0"), val = tensor([1, 1])]; + int32 k_63_groups_0 = const()[name = string("k_63_groups_0"), val = int32(1)]; + tensor k_63 = conv(dilations = k_63_dilations_0, groups = k_63_groups_0, pad = k_63_pad_0, pad_type = k_63_pad_type_0, strides = k_63_strides_0, weight = layers_10_self_attn_k_proj_weight_palettized, x = var_6919_cast_fp16)[name = string("k_63")]; + tensor var_7017 = const()[name = string("op_7017"), val = tensor([1, 2, 256, 3])]; + tensor var_7018 = reshape(shape = var_7017, x = k_63)[name = string("op_7018")]; + tensor transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_23_pad_type_0 = const()[name = string("v_23_pad_type_0"), val = string("valid")]; + tensor v_23_strides_0 = const()[name = string("v_23_strides_0"), val = tensor([1, 1])]; + tensor v_23_pad_0 = const()[name = string("v_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_23_dilations_0 = const()[name = string("v_23_dilations_0"), val = tensor([1, 1])]; + int32 v_23_groups_0 = const()[name = string("v_23_groups_0"), val = int32(1)]; + tensor v_23 = conv(dilations = v_23_dilations_0, groups = v_23_groups_0, pad = v_23_pad_0, pad_type = v_23_pad_type_0, strides = v_23_strides_0, weight = layers_10_self_attn_v_proj_weight_palettized, x = var_6919_cast_fp16)[name = string("v_23")]; + tensor var_7045 = const()[name = string("op_7045"), val = tensor([1, 2, 256, 3])]; + tensor var_7046 = reshape(shape = var_7045, x = v_23)[name = string("op_7046")]; + tensor var_7051 = const()[name = string("op_7051"), val = tensor([0, 1, 3, 2])]; + tensor var_7069 = const()[name = string("op_7069"), val = tensor([3, 2, 256])]; + tensor transpose_79 = transpose(perm = transpose_79_perm_0, x = var_7018)[name = string("transpose_36")]; + tensor x_203 = reshape(shape = var_7069, x = transpose_79)[name = string("x_203")]; + int32 var_7075 = const()[name = string("op_7075"), val = int32(-1)]; + fp16 const_121_promoted = const()[name = string("const_121_promoted"), val = fp16(-0x1p+0)]; + tensor var_7077 = mul(x = x_203, y = const_121_promoted)[name = string("op_7077")]; + bool input_303_interleave_0 = const()[name = string("input_303_interleave_0"), val = bool(false)]; + tensor input_303 = concat(axis = var_7075, interleave = input_303_interleave_0, values = (x_203, var_7077))[name = string("input_303")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_7072_to_fp16 = const()[name = string("op_7072_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_7072_to_fp16, x = input_303)[name = string("normed_289_cast_fp16")]; + tensor var_7082_split_sizes_0 = const()[name = string("op_7082_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7082_axis_0 = const()[name = string("op_7082_axis_0"), val = int32(-1)]; + tensor var_7082_0, tensor var_7082_1 = split(axis = var_7082_axis_0, split_sizes = var_7082_split_sizes_0, x = normed_289_cast_fp16)[name = string("op_7082")]; + tensor k_67 = mul(x = var_7082_0, y = layers_4_self_attn_k_norm_weight)[name = string("k_67")]; + tensor var_7089 = const()[name = string("op_7089"), val = tensor([1, 3, 2, 256])]; + tensor var_7090 = reshape(shape = var_7089, x = k_67)[name = string("op_7090")]; + tensor var_7095 = const()[name = string("op_7095"), val = tensor([0, 2, 1, 3])]; + fp16 var_7097_promoted = const()[name = string("op_7097_promoted"), val = fp16(0x1p+1)]; + tensor var_7052 = transpose(perm = var_7051, x = var_7046)[name = string("transpose_35")]; + tensor var_7098 = pow(x = var_7052, y = var_7097_promoted)[name = string("op_7098")]; + tensor var_7103_axes_0 = const()[name = string("op_7103_axes_0"), val = tensor([-1])]; + bool var_7103_keep_dims_0 = const()[name = string("op_7103_keep_dims_0"), val = bool(true)]; + tensor var_7103 = reduce_mean(axes = var_7103_axes_0, keep_dims = var_7103_keep_dims_0, x = var_7098)[name = string("op_7103")]; + fp16 var_7105_to_fp16 = const()[name = string("op_7105_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_21_cast_fp16 = add(x = var_7103, y = var_7105_to_fp16)[name = string("mean_sq_21_cast_fp16")]; + fp32 var_7107_epsilon_0 = const()[name = string("op_7107_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7107_cast_fp16 = rsqrt(epsilon = var_7107_epsilon_0, x = mean_sq_21_cast_fp16)[name = string("op_7107_cast_fp16")]; + tensor input_307_cast_fp16 = mul(x = var_7052, y = var_7107_cast_fp16)[name = string("input_307_cast_fp16")]; + tensor q_129 = transpose(perm = var_7095, x = var_7090)[name = string("transpose_34")]; + tensor var_7109_cast_fp16 = mul(x = q_129, y = cos_s)[name = string("op_7109_cast_fp16")]; + tensor var_7110_split_sizes_0 = const()[name = string("op_7110_split_sizes_0"), val = tensor([128, 128])]; + int32 var_7110_axis_0 = const()[name = string("op_7110_axis_0"), val = int32(-1)]; + tensor var_7110_0, tensor var_7110_1 = split(axis = var_7110_axis_0, split_sizes = var_7110_split_sizes_0, x = q_129)[name = string("op_7110")]; + fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; + tensor var_7112 = mul(x = var_7110_1, y = const_122_promoted)[name = string("op_7112")]; + int32 var_7114 = const()[name = string("op_7114"), val = int32(-1)]; + bool var_7115_interleave_0 = const()[name = string("op_7115_interleave_0"), val = bool(false)]; + tensor var_7115 = concat(axis = var_7114, interleave = var_7115_interleave_0, values = (var_7112, var_7110_0))[name = string("op_7115")]; + tensor var_7116_cast_fp16 = mul(x = var_7115, y = sin_s)[name = string("op_7116_cast_fp16")]; + tensor input_305_cast_fp16 = add(x = var_7109_cast_fp16, y = var_7116_cast_fp16)[name = string("input_305_cast_fp16")]; + tensor k_padded_pad_0 = const()[name = string("k_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_mode_0 = const()[name = string("k_padded_mode_0"), val = string("constant")]; + fp16 const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_cast_fp16 = pad(constant_val = const_123_to_fp16, mode = k_padded_mode_0, pad = k_padded_pad_0, x = input_305_cast_fp16)[name = string("k_padded_cast_fp16")]; + tensor v_padded_pad_0 = const()[name = string("v_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_mode_0 = const()[name = string("v_padded_mode_0"), val = string("constant")]; + fp16 const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_cast_fp16 = pad(constant_val = const_124_to_fp16, mode = v_padded_mode_0, pad = v_padded_pad_0, x = input_307_cast_fp16)[name = string("v_padded_cast_fp16")]; + tensor slot_k_21_begin_0 = const()[name = string("slot_k_21_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor slot_k_21_end_0 = const()[name = string("slot_k_21_end_0"), val = tensor([1, 2, 512, 512])]; + tensor slot_k_21_end_mask_0 = const()[name = string("slot_k_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor slot_k_21_cast_fp16 = slice_by_index(begin = slot_k_21_begin_0, end = slot_k_21_end_0, end_mask = slot_k_21_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("slot_k_21_cast_fp16")]; + tensor slot_v_21_begin_0 = const()[name = string("slot_v_21_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor slot_v_21_end_0 = const()[name = string("slot_v_21_end_0"), val = tensor([1, 2, 512, 512])]; + tensor slot_v_21_end_mask_0 = const()[name = string("slot_v_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor slot_v_21_cast_fp16 = slice_by_index(begin = slot_v_21_begin_0, end = slot_v_21_end_0, end_mask = slot_v_21_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("slot_v_21_cast_fp16")]; + tensor var_7155_begin_0 = const()[name = string("op_7155_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_7155_end_0 = const()[name = string("op_7155_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_7155_end_mask_0 = const()[name = string("op_7155_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7155_cast_fp16 = slice_by_index(begin = var_7155_begin_0, end = var_7155_end_0, end_mask = var_7155_end_mask_0, x = slot_k_21_cast_fp16)[name = string("op_7155_cast_fp16")]; + int32 var_7162 = const()[name = string("op_7162"), val = int32(2)]; + bool new_k_21_interleave_0 = const()[name = string("new_k_21_interleave_0"), val = bool(false)]; + tensor new_k_21_cast_fp16 = concat(axis = var_7162, interleave = new_k_21_interleave_0, values = (var_7155_cast_fp16, k_padded_cast_fp16))[name = string("new_k_21_cast_fp16")]; + tensor var_7178_begin_0 = const()[name = string("op_7178_begin_0"), val = tensor([0, 0, 3, 0])]; + tensor var_7178_end_0 = const()[name = string("op_7178_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_7178_end_mask_0 = const()[name = string("op_7178_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7178_cast_fp16 = slice_by_index(begin = var_7178_begin_0, end = var_7178_end_0, end_mask = var_7178_end_mask_0, x = slot_v_21_cast_fp16)[name = string("op_7178_cast_fp16")]; + int32 var_7185 = const()[name = string("op_7185"), val = int32(2)]; + bool new_v_21_interleave_0 = const()[name = string("new_v_21_interleave_0"), val = bool(false)]; + tensor new_v_21_cast_fp16 = concat(axis = var_7185, interleave = new_v_21_interleave_0, values = (var_7178_cast_fp16, v_padded_cast_fp16))[name = string("new_v_21_cast_fp16")]; + tensor var_7191_begin_0 = const()[name = string("op_7191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7191_end_0 = const()[name = string("op_7191_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_7191_end_mask_0 = const()[name = string("op_7191_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7191_cast_fp16 = slice_by_index(begin = var_7191_begin_0, end = var_7191_end_0, end_mask = var_7191_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("op_7191_cast_fp16")]; + int32 var_7198 = const()[name = string("op_7198"), val = int32(0)]; + bool K_sliding_out_interleave_0 = const()[name = string("K_sliding_out_interleave_0"), val = bool(false)]; + tensor K_sliding_out = concat(axis = var_7198, interleave = K_sliding_out_interleave_0, values = (var_7191_cast_fp16, new_k_21_cast_fp16))[name = string("K_sliding_out_cast_fp16")]; + tensor var_7204_begin_0 = const()[name = string("op_7204_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7204_end_0 = const()[name = string("op_7204_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_7204_end_mask_0 = const()[name = string("op_7204_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7204_cast_fp16 = slice_by_index(begin = var_7204_begin_0, end = var_7204_end_0, end_mask = var_7204_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("op_7204_cast_fp16")]; + int32 var_7211 = const()[name = string("op_7211"), val = int32(0)]; + bool V_sliding_out_interleave_0 = const()[name = string("V_sliding_out_interleave_0"), val = bool(false)]; + tensor V_sliding_out = concat(axis = var_7211, interleave = V_sliding_out_interleave_0, values = (var_7204_cast_fp16, new_v_21_cast_fp16))[name = string("V_sliding_out_cast_fp16")]; + tensor var_7217_begin_0 = const()[name = string("op_7217_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_7217_end_0 = const()[name = string("op_7217_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_7217_end_mask_0 = const()[name = string("op_7217_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7217_cast_fp16 = slice_by_index(begin = var_7217_begin_0, end = var_7217_end_0, end_mask = var_7217_end_mask_0, x = K_sliding_out)[name = string("op_7217_cast_fp16")]; + tensor K_for_attn_21_begin_0 = const()[name = string("K_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_21_end_0 = const()[name = string("K_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_21_end_mask_0 = const()[name = string("K_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor kv13_k = slice_by_index(begin = K_for_attn_21_begin_0, end = K_for_attn_21_end_0, end_mask = K_for_attn_21_end_mask_0, x = var_7217_cast_fp16)[name = string("K_for_attn_21_cast_fp16")]; + tensor var_7227_begin_0 = const()[name = string("op_7227_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_7227_end_0 = const()[name = string("op_7227_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_7227_end_mask_0 = const()[name = string("op_7227_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_7227_cast_fp16 = slice_by_index(begin = var_7227_begin_0, end = var_7227_end_0, end_mask = var_7227_end_mask_0, x = V_sliding_out)[name = string("op_7227_cast_fp16")]; + tensor V_for_attn_21_begin_0 = const()[name = string("V_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_21_end_0 = const()[name = string("V_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_21_end_mask_0 = const()[name = string("V_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor kv13_v = slice_by_index(begin = V_for_attn_21_begin_0, end = V_for_attn_21_end_0, end_mask = V_for_attn_21_end_mask_0, x = var_7227_cast_fp16)[name = string("V_for_attn_21_cast_fp16")]; + tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_20_reps_0 = const()[name = string("tile_20_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_40_cast_fp16 = transpose(perm = transpose_40_perm_0, x = kv13_k)[name = string("transpose_33")]; + tensor tile_20_cast_fp16 = tile(reps = tile_20_reps_0, x = transpose_40_cast_fp16)[name = string("tile_20_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_40_cast_fp16 = reshape(shape = concat_42, x = tile_20_cast_fp16)[name = string("reshape_40_cast_fp16")]; + tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_40_cast_fp16)[name = string("transpose_32")]; + tensor reshape_41_cast_fp16 = reshape(shape = concat_43, x = transpose_41_cast_fp16)[name = string("reshape_41_cast_fp16")]; + tensor transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_21_reps_0 = const()[name = string("tile_21_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_42_cast_fp16 = transpose(perm = transpose_42_perm_0, x = kv13_v)[name = string("transpose_31")]; + tensor tile_21_cast_fp16 = tile(reps = tile_21_reps_0, x = transpose_42_cast_fp16)[name = string("tile_21_cast_fp16")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_42_cast_fp16 = reshape(shape = concat_44, x = tile_21_cast_fp16)[name = string("reshape_42_cast_fp16")]; + tensor transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_45 = const()[name = string("concat_45"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_43_cast_fp16 = transpose(perm = transpose_43_perm_0, x = reshape_42_cast_fp16)[name = string("transpose_30")]; + tensor reshape_43_cast_fp16 = reshape(shape = concat_45, x = transpose_43_cast_fp16)[name = string("reshape_43_cast_fp16")]; + tensor V_expanded_21_perm_0 = const()[name = string("V_expanded_21_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor transpose_80_cast_fp16 = transpose(perm = transpose_80_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_29")]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = q_131_cast_fp16, y = transpose_80_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask_sliding)[name = string("x_207_cast_fp16")]; + tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; + bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; + tensor reduce_max_10 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = x_207_cast_fp16)[name = string("reduce_max_10")]; + tensor var_7282 = sub(x = x_207_cast_fp16, y = reduce_max_10)[name = string("op_7282")]; + tensor var_7288 = exp(x = var_7282)[name = string("op_7288")]; + tensor var_7298_axes_0 = const()[name = string("op_7298_axes_0"), val = tensor([-1])]; + bool var_7298_keep_dims_0 = const()[name = string("op_7298_keep_dims_0"), val = bool(true)]; + tensor var_7298 = reduce_sum(axes = var_7298_axes_0, keep_dims = var_7298_keep_dims_0, x = var_7288)[name = string("op_7298")]; + tensor var_7304_cast_fp16 = real_div(x = var_7288, y = var_7298)[name = string("op_7304_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor V_expanded_21_cast_fp16 = transpose(perm = V_expanded_21_perm_0, x = reshape_43_cast_fp16)[name = string("transpose_28")]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = var_7304_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_7315 = const()[name = string("op_7315"), val = tensor([0, 2, 1, 3])]; + tensor var_7322 = const()[name = string("op_7322"), val = tensor([1, 3, -1])]; + tensor var_7316_cast_fp16 = transpose(perm = var_7315, x = attn_output_61_cast_fp16)[name = string("transpose_27")]; + tensor attn_output_63_cast_fp16 = reshape(shape = var_7322, x = var_7316_cast_fp16)[name = string("attn_output_63_cast_fp16")]; + tensor var_7327 = const()[name = string("op_7327"), val = tensor([0, 2, 1])]; + string var_7343_pad_type_0 = const()[name = string("op_7343_pad_type_0"), val = string("valid")]; + int32 var_7343_groups_0 = const()[name = string("op_7343_groups_0"), val = int32(1)]; + tensor var_7343_strides_0 = const()[name = string("op_7343_strides_0"), val = tensor([1])]; + tensor var_7343_pad_0 = const()[name = string("op_7343_pad_0"), val = tensor([0, 0])]; + tensor var_7343_dilations_0 = const()[name = string("op_7343_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(563630272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566251776))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7328_cast_fp16 = transpose(perm = var_7327, x = attn_output_63_cast_fp16)[name = string("transpose_26")]; + tensor var_7343_cast_fp16 = conv(dilations = var_7343_dilations_0, groups = var_7343_groups_0, pad = var_7343_pad_0, pad_type = var_7343_pad_type_0, strides = var_7343_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_7328_cast_fp16)[name = string("op_7343_cast_fp16")]; + tensor var_7347 = const()[name = string("op_7347"), val = tensor([0, 2, 1])]; + int32 var_7353 = const()[name = string("op_7353"), val = int32(-1)]; + fp16 const_125_promoted_to_fp16 = const()[name = string("const_125_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_211_cast_fp16 = transpose(perm = var_7347, x = var_7343_cast_fp16)[name = string("transpose_25")]; + tensor var_7355_cast_fp16 = mul(x = x_211_cast_fp16, y = const_125_promoted_to_fp16)[name = string("op_7355_cast_fp16")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311_cast_fp16 = concat(axis = var_7353, interleave = input_311_interleave_0, values = (x_211_cast_fp16, var_7355_cast_fp16))[name = string("input_311_cast_fp16")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_7350_to_fp16 = const()[name = string("op_7350_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_7350_to_fp16, x = input_311_cast_fp16)[name = string("normed_293_cast_fp16")]; + tensor var_7360_split_sizes_0 = const()[name = string("op_7360_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7360_axis_0 = const()[name = string("op_7360_axis_0"), val = int32(-1)]; + tensor var_7360_cast_fp16_0, tensor var_7360_cast_fp16_1 = split(axis = var_7360_axis_0, split_sizes = var_7360_split_sizes_0, x = normed_293_cast_fp16)[name = string("op_7360_cast_fp16")]; + tensor layers_10_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566254400)))]; + tensor attn_output_65_cast_fp16 = mul(x = var_7360_cast_fp16_0, y = layers_10_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_199_cast_fp16, y = attn_output_65_cast_fp16)[name = string("x_213_cast_fp16")]; + int32 var_7369 = const()[name = string("op_7369"), val = int32(-1)]; + fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7371_cast_fp16 = mul(x = x_213_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_7371_cast_fp16")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313_cast_fp16 = concat(axis = var_7369, interleave = input_313_interleave_0, values = (x_213_cast_fp16, var_7371_cast_fp16))[name = string("input_313_cast_fp16")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_7366_to_fp16 = const()[name = string("op_7366_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_7366_to_fp16, x = input_313_cast_fp16)[name = string("normed_297_cast_fp16")]; + tensor var_7376_split_sizes_0 = const()[name = string("op_7376_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7376_axis_0 = const()[name = string("op_7376_axis_0"), val = int32(-1)]; + tensor var_7376_cast_fp16_0, tensor var_7376_cast_fp16_1 = split(axis = var_7376_axis_0, split_sizes = var_7376_split_sizes_0, x = normed_297_cast_fp16)[name = string("op_7376_cast_fp16")]; + tensor layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566259584)))]; + tensor h_63_cast_fp16 = mul(x = var_7376_cast_fp16_0, y = layers_10_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_63_cast_fp16")]; + tensor var_7387 = const()[name = string("op_7387"), val = tensor([0, 2, 1])]; + tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; + tensor var_7388 = transpose(perm = var_7387, x = h_63_cast_fp16)[name = string("transpose_24")]; + tensor input_315 = expand_dims(axes = input_315_axes_0, x = var_7388)[name = string("input_315")]; + string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; + tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; + tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; + int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; + tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = layers_10_mlp_gate_proj_weight_palettized, x = input_315)[name = string("gate_41")]; + string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; + tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; + tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; + int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; + tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = layers_10_mlp_up_proj_weight_palettized, x = input_315)[name = string("up_21")]; + string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; + tensor input_317 = mul(x = gate_43, y = up_21)[name = string("input_317")]; + string mlp_out_21_pad_type_0 = const()[name = string("mlp_out_21_pad_type_0"), val = string("valid")]; + tensor mlp_out_21_strides_0 = const()[name = string("mlp_out_21_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_21_pad_0 = const()[name = string("mlp_out_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_21_dilations_0 = const()[name = string("mlp_out_21_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_21_groups_0 = const()[name = string("mlp_out_21_groups_0"), val = int32(1)]; + tensor mlp_out_21 = conv(dilations = mlp_out_21_dilations_0, groups = mlp_out_21_groups_0, pad = mlp_out_21_pad_0, pad_type = mlp_out_21_pad_type_0, strides = mlp_out_21_strides_0, weight = layers_10_mlp_down_proj_weight_palettized, x = input_317)[name = string("mlp_out_21")]; + tensor var_7428_axes_0 = const()[name = string("op_7428_axes_0"), val = tensor([2])]; + tensor var_7428 = squeeze(axes = var_7428_axes_0, x = mlp_out_21)[name = string("op_7428")]; + tensor var_7432 = const()[name = string("op_7432"), val = tensor([0, 2, 1])]; + int32 var_7438 = const()[name = string("op_7438"), val = int32(-1)]; + fp16 const_127_promoted = const()[name = string("const_127_promoted"), val = fp16(-0x1p+0)]; + tensor x_215 = transpose(perm = var_7432, x = var_7428)[name = string("transpose_23")]; + tensor var_7440 = mul(x = x_215, y = const_127_promoted)[name = string("op_7440")]; + bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; + tensor input_319 = concat(axis = var_7438, interleave = input_319_interleave_0, values = (x_215, var_7440))[name = string("input_319")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_7435_to_fp16 = const()[name = string("op_7435_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_7435_to_fp16, x = input_319)[name = string("normed_301_cast_fp16")]; + tensor var_7445_split_sizes_0 = const()[name = string("op_7445_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7445_axis_0 = const()[name = string("op_7445_axis_0"), val = int32(-1)]; + tensor var_7445_0, tensor var_7445_1 = split(axis = var_7445_axis_0, split_sizes = var_7445_split_sizes_0, x = normed_301_cast_fp16)[name = string("op_7445")]; + tensor hidden_states_103 = mul(x = var_7445_0, y = layers_10_post_feedforward_layernorm_weight)[name = string("hidden_states_103")]; + tensor hidden_states_105_cast_fp16 = add(x = x_213_cast_fp16, y = hidden_states_103)[name = string("hidden_states_105_cast_fp16")]; + tensor per_layer_slice_21_begin_0 = const()[name = string("per_layer_slice_21_begin_0"), val = tensor([0, 0, 5632])]; + tensor per_layer_slice_21_end_0 = const()[name = string("per_layer_slice_21_end_0"), val = tensor([1, 3, 5888])]; + tensor per_layer_slice_21_end_mask_0 = const()[name = string("per_layer_slice_21_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_21_cast_fp16 = slice_by_index(begin = per_layer_slice_21_begin_0, end = per_layer_slice_21_end_0, end_mask = per_layer_slice_21_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_21_cast_fp16")]; + tensor var_7473 = const()[name = string("op_7473"), val = tensor([0, 2, 1])]; + tensor input_321_axes_0 = const()[name = string("input_321_axes_0"), val = tensor([2])]; + tensor var_7474 = transpose(perm = var_7473, x = hidden_states_105_cast_fp16)[name = string("transpose_22")]; + tensor input_321 = expand_dims(axes = input_321_axes_0, x = var_7474)[name = string("input_321")]; + string gated_61_pad_type_0 = const()[name = string("gated_61_pad_type_0"), val = string("valid")]; + tensor gated_61_strides_0 = const()[name = string("gated_61_strides_0"), val = tensor([1, 1])]; + tensor gated_61_pad_0 = const()[name = string("gated_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_61_dilations_0 = const()[name = string("gated_61_dilations_0"), val = tensor([1, 1])]; + int32 gated_61_groups_0 = const()[name = string("gated_61_groups_0"), val = int32(1)]; + tensor gated_61 = conv(dilations = gated_61_dilations_0, groups = gated_61_groups_0, pad = gated_61_pad_0, pad_type = gated_61_pad_type_0, strides = gated_61_strides_0, weight = layers_10_per_layer_input_gate_weight_palettized, x = input_321)[name = string("gated_61")]; + string gated_63_mode_0 = const()[name = string("gated_63_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_63 = gelu(mode = gated_63_mode_0, x = gated_61)[name = string("gated_63")]; + tensor var_7493 = const()[name = string("op_7493"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_21_axes_0 = const()[name = string("per_layer_slice_conv_21_axes_0"), val = tensor([2])]; + tensor var_7494_cast_fp16 = transpose(perm = var_7493, x = per_layer_slice_21_cast_fp16)[name = string("transpose_21")]; + tensor per_layer_slice_conv_21_cast_fp16 = expand_dims(axes = per_layer_slice_conv_21_axes_0, x = var_7494_cast_fp16)[name = string("per_layer_slice_conv_21_cast_fp16")]; + tensor input_323_cast_fp16 = mul(x = gated_63, y = per_layer_slice_conv_21_cast_fp16)[name = string("input_323_cast_fp16")]; + string gated_65_pad_type_0 = const()[name = string("gated_65_pad_type_0"), val = string("valid")]; + tensor gated_65_strides_0 = const()[name = string("gated_65_strides_0"), val = tensor([1, 1])]; + tensor gated_65_pad_0 = const()[name = string("gated_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_65_dilations_0 = const()[name = string("gated_65_dilations_0"), val = tensor([1, 1])]; + int32 gated_65_groups_0 = const()[name = string("gated_65_groups_0"), val = int32(1)]; + tensor layers_10_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566264768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566592512))))[name = string("layers_10_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_65_cast_fp16 = conv(dilations = gated_65_dilations_0, groups = gated_65_groups_0, pad = gated_65_pad_0, pad_type = gated_65_pad_type_0, strides = gated_65_strides_0, weight = layers_10_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_323_cast_fp16)[name = string("gated_65_cast_fp16")]; + tensor var_7510_axes_0 = const()[name = string("op_7510_axes_0"), val = tensor([2])]; + tensor var_7510_cast_fp16 = squeeze(axes = var_7510_axes_0, x = gated_65_cast_fp16)[name = string("op_7510_cast_fp16")]; + tensor var_7514 = const()[name = string("op_7514"), val = tensor([0, 2, 1])]; + int32 var_7520 = const()[name = string("op_7520"), val = int32(-1)]; + fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_217_cast_fp16 = transpose(perm = var_7514, x = var_7510_cast_fp16)[name = string("transpose_20")]; + tensor var_7522_cast_fp16 = mul(x = x_217_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_7522_cast_fp16")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325_cast_fp16 = concat(axis = var_7520, interleave = input_325_interleave_0, values = (x_217_cast_fp16, var_7522_cast_fp16))[name = string("input_325_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_7517_to_fp16 = const()[name = string("op_7517_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_7517_to_fp16, x = input_325_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor var_7527_split_sizes_0 = const()[name = string("op_7527_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7527_axis_0 = const()[name = string("op_7527_axis_0"), val = int32(-1)]; + tensor var_7527_cast_fp16_0, tensor var_7527_cast_fp16_1 = split(axis = var_7527_axis_0, split_sizes = var_7527_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_7527_cast_fp16")]; + tensor layers_10_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_10_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566595136)))]; + tensor hidden_states_109_cast_fp16 = mul(x = var_7527_cast_fp16_0, y = layers_10_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_109_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_105_cast_fp16, y = hidden_states_109_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + tensor const_129_promoted_to_fp16 = const()[name = string("const_129_promoted_to_fp16"), val = tensor([0x1.42p-3])]; + tensor x_219_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_129_promoted_to_fp16)[name = string("x_219_cast_fp16")]; + int32 var_7542 = const()[name = string("op_7542"), val = int32(-1)]; + fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7544_cast_fp16 = mul(x = x_219_cast_fp16, y = const_130_promoted_to_fp16)[name = string("op_7544_cast_fp16")]; + bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; + tensor input_327_cast_fp16 = concat(axis = var_7542, interleave = input_327_interleave_0, values = (x_219_cast_fp16, var_7544_cast_fp16))[name = string("input_327_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_7539_to_fp16 = const()[name = string("op_7539_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_7539_to_fp16, x = input_327_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor var_7549_split_sizes_0 = const()[name = string("op_7549_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7549_axis_0 = const()[name = string("op_7549_axis_0"), val = int32(-1)]; + tensor var_7549_cast_fp16_0, tensor var_7549_cast_fp16_1 = split(axis = var_7549_axis_0, split_sizes = var_7549_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_7549_cast_fp16")]; + tensor layers_11_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566600320)))]; + tensor h_67_cast_fp16 = mul(x = var_7549_cast_fp16_0, y = layers_11_input_layernorm_weight_promoted_to_fp16)[name = string("h_67_cast_fp16")]; + tensor var_7555 = const()[name = string("op_7555"), val = tensor([0, 2, 1])]; + tensor var_7558_axes_0 = const()[name = string("op_7558_axes_0"), val = tensor([2])]; + tensor var_7556_cast_fp16 = transpose(perm = var_7555, x = h_67_cast_fp16)[name = string("transpose_19")]; + tensor var_7558_cast_fp16 = expand_dims(axes = var_7558_axes_0, x = var_7556_cast_fp16)[name = string("op_7558_cast_fp16")]; + string q_133_pad_type_0 = const()[name = string("q_133_pad_type_0"), val = string("valid")]; + tensor q_133_strides_0 = const()[name = string("q_133_strides_0"), val = tensor([1, 1])]; + tensor q_133_pad_0 = const()[name = string("q_133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_133_dilations_0 = const()[name = string("q_133_dilations_0"), val = tensor([1, 1])]; + int32 q_133_groups_0 = const()[name = string("q_133_groups_0"), val = int32(1)]; + tensor q_133 = conv(dilations = q_133_dilations_0, groups = q_133_groups_0, pad = q_133_pad_0, pad_type = q_133_pad_type_0, strides = q_133_strides_0, weight = layers_11_self_attn_q_proj_weight_palettized, x = var_7558_cast_fp16)[name = string("q_133")]; + tensor var_7579 = const()[name = string("op_7579"), val = tensor([1, 8, 512, 3])]; + tensor var_7580 = reshape(shape = var_7579, x = q_133)[name = string("op_7580")]; + tensor transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_7603 = const()[name = string("op_7603"), val = tensor([3, 8, 512])]; + tensor transpose_81 = transpose(perm = transpose_81_perm_0, x = var_7580)[name = string("transpose_18")]; + tensor x_221 = reshape(shape = var_7603, x = transpose_81)[name = string("x_221")]; + int32 var_7609 = const()[name = string("op_7609"), val = int32(-1)]; + fp16 const_131_promoted = const()[name = string("const_131_promoted"), val = fp16(-0x1p+0)]; + tensor var_7611 = mul(x = x_221, y = const_131_promoted)[name = string("op_7611")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331 = concat(axis = var_7609, interleave = input_331_interleave_0, values = (x_221, var_7611))[name = string("input_331")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_7606_to_fp16 = const()[name = string("op_7606_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_7606_to_fp16, x = input_331)[name = string("normed_313_cast_fp16")]; + tensor var_7616_split_sizes_0 = const()[name = string("op_7616_split_sizes_0"), val = tensor([512, 512])]; + int32 var_7616_axis_0 = const()[name = string("op_7616_axis_0"), val = int32(-1)]; + tensor var_7616_0, tensor var_7616_1 = split(axis = var_7616_axis_0, split_sizes = var_7616_split_sizes_0, x = normed_313_cast_fp16)[name = string("op_7616")]; + tensor q_137 = mul(x = var_7616_0, y = layers_11_self_attn_q_norm_weight)[name = string("q_137")]; + tensor var_7623 = const()[name = string("op_7623"), val = tensor([1, 3, 8, 512])]; + tensor var_7624 = reshape(shape = var_7623, x = q_137)[name = string("op_7624")]; + tensor var_7629 = const()[name = string("op_7629"), val = tensor([0, 2, 1, 3])]; + tensor q_139 = transpose(perm = var_7629, x = var_7624)[name = string("transpose_17")]; + tensor var_7631_cast_fp16 = mul(x = q_139, y = cos_f)[name = string("op_7631_cast_fp16")]; + tensor var_7632_split_sizes_0 = const()[name = string("op_7632_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7632_axis_0 = const()[name = string("op_7632_axis_0"), val = int32(-1)]; + tensor var_7632_0, tensor var_7632_1 = split(axis = var_7632_axis_0, split_sizes = var_7632_split_sizes_0, x = q_139)[name = string("op_7632")]; + fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; + tensor var_7634 = mul(x = var_7632_1, y = const_132_promoted)[name = string("op_7634")]; + int32 var_7636 = const()[name = string("op_7636"), val = int32(-1)]; + bool var_7637_interleave_0 = const()[name = string("op_7637_interleave_0"), val = bool(false)]; + tensor var_7637 = concat(axis = var_7636, interleave = var_7637_interleave_0, values = (var_7634, var_7632_0))[name = string("op_7637")]; + tensor var_7638_cast_fp16 = mul(x = var_7637, y = sin_f)[name = string("op_7638_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_7631_cast_fp16, y = var_7638_cast_fp16)[name = string("q_cast_fp16")]; + string k_69_pad_type_0 = const()[name = string("k_69_pad_type_0"), val = string("valid")]; + tensor k_69_strides_0 = const()[name = string("k_69_strides_0"), val = tensor([1, 1])]; + tensor k_69_pad_0 = const()[name = string("k_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor k_69_dilations_0 = const()[name = string("k_69_dilations_0"), val = tensor([1, 1])]; + int32 k_69_groups_0 = const()[name = string("k_69_groups_0"), val = int32(1)]; + tensor k_69 = conv(dilations = k_69_dilations_0, groups = k_69_groups_0, pad = k_69_pad_0, pad_type = k_69_pad_type_0, strides = k_69_strides_0, weight = layers_11_self_attn_k_proj_weight_palettized, x = var_7558_cast_fp16)[name = string("k_69")]; + tensor var_7656 = const()[name = string("op_7656"), val = tensor([1, 2, 512, 3])]; + tensor var_7657 = reshape(shape = var_7656, x = k_69)[name = string("op_7657")]; + tensor transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor([0, 3, 1, 2])]; + string v_25_pad_type_0 = const()[name = string("v_25_pad_type_0"), val = string("valid")]; + tensor v_25_strides_0 = const()[name = string("v_25_strides_0"), val = tensor([1, 1])]; + tensor v_25_pad_0 = const()[name = string("v_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor v_25_dilations_0 = const()[name = string("v_25_dilations_0"), val = tensor([1, 1])]; + int32 v_25_groups_0 = const()[name = string("v_25_groups_0"), val = int32(1)]; + tensor v_25 = conv(dilations = v_25_dilations_0, groups = v_25_groups_0, pad = v_25_pad_0, pad_type = v_25_pad_type_0, strides = v_25_strides_0, weight = layers_11_self_attn_v_proj_weight_palettized, x = var_7558_cast_fp16)[name = string("v_25")]; + tensor var_7684 = const()[name = string("op_7684"), val = tensor([1, 2, 512, 3])]; + tensor var_7685 = reshape(shape = var_7684, x = v_25)[name = string("op_7685")]; + tensor var_7690 = const()[name = string("op_7690"), val = tensor([0, 1, 3, 2])]; + tensor var_7708 = const()[name = string("op_7708"), val = tensor([3, 2, 512])]; + tensor transpose_82 = transpose(perm = transpose_82_perm_0, x = var_7657)[name = string("transpose_16")]; + tensor x_223 = reshape(shape = var_7708, x = transpose_82)[name = string("x_223")]; + int32 var_7714 = const()[name = string("op_7714"), val = int32(-1)]; + fp16 const_133_promoted = const()[name = string("const_133_promoted"), val = fp16(-0x1p+0)]; + tensor var_7716 = mul(x = x_223, y = const_133_promoted)[name = string("op_7716")]; + bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; + tensor input_333 = concat(axis = var_7714, interleave = input_333_interleave_0, values = (x_223, var_7716))[name = string("input_333")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_7711_to_fp16 = const()[name = string("op_7711_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_7711_to_fp16, x = input_333)[name = string("normed_317_cast_fp16")]; + tensor var_7721_split_sizes_0 = const()[name = string("op_7721_split_sizes_0"), val = tensor([512, 512])]; + int32 var_7721_axis_0 = const()[name = string("op_7721_axis_0"), val = int32(-1)]; + tensor var_7721_0, tensor var_7721_1 = split(axis = var_7721_axis_0, split_sizes = var_7721_split_sizes_0, x = normed_317_cast_fp16)[name = string("op_7721")]; + tensor k_73 = mul(x = var_7721_0, y = layers_11_self_attn_k_norm_weight)[name = string("k_73")]; + tensor var_7728 = const()[name = string("op_7728"), val = tensor([1, 3, 2, 512])]; + tensor var_7729 = reshape(shape = var_7728, x = k_73)[name = string("op_7729")]; + tensor var_7734 = const()[name = string("op_7734"), val = tensor([0, 2, 1, 3])]; + fp16 var_7736_promoted = const()[name = string("op_7736_promoted"), val = fp16(0x1p+1)]; + tensor var_7691 = transpose(perm = var_7690, x = var_7685)[name = string("transpose_15")]; + tensor var_7737 = pow(x = var_7691, y = var_7736_promoted)[name = string("op_7737")]; + tensor var_7742_axes_0 = const()[name = string("op_7742_axes_0"), val = tensor([-1])]; + bool var_7742_keep_dims_0 = const()[name = string("op_7742_keep_dims_0"), val = bool(true)]; + tensor var_7742 = reduce_mean(axes = var_7742_axes_0, keep_dims = var_7742_keep_dims_0, x = var_7737)[name = string("op_7742")]; + fp16 var_7744_to_fp16 = const()[name = string("op_7744_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_cast_fp16 = add(x = var_7742, y = var_7744_to_fp16)[name = string("mean_sq_cast_fp16")]; + fp32 var_7746_epsilon_0 = const()[name = string("op_7746_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7746_cast_fp16 = rsqrt(epsilon = var_7746_epsilon_0, x = mean_sq_cast_fp16)[name = string("op_7746_cast_fp16")]; + tensor v_cast_fp16 = mul(x = var_7691, y = var_7746_cast_fp16)[name = string("v_cast_fp16")]; + tensor q_141 = transpose(perm = var_7734, x = var_7729)[name = string("transpose_14")]; + tensor var_7748_cast_fp16 = mul(x = q_141, y = cos_f)[name = string("op_7748_cast_fp16")]; + tensor var_7749_split_sizes_0 = const()[name = string("op_7749_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7749_axis_0 = const()[name = string("op_7749_axis_0"), val = int32(-1)]; + tensor var_7749_0, tensor var_7749_1 = split(axis = var_7749_axis_0, split_sizes = var_7749_split_sizes_0, x = q_141)[name = string("op_7749")]; + fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; + tensor var_7751 = mul(x = var_7749_1, y = const_134_promoted)[name = string("op_7751")]; + int32 var_7753 = const()[name = string("op_7753"), val = int32(-1)]; + bool var_7754_interleave_0 = const()[name = string("op_7754_interleave_0"), val = bool(false)]; + tensor var_7754 = concat(axis = var_7753, interleave = var_7754_interleave_0, values = (var_7751, var_7749_0))[name = string("op_7754")]; + tensor var_7755_cast_fp16 = mul(x = var_7754, y = sin_f)[name = string("op_7755_cast_fp16")]; + tensor k_cast_fp16 = add(x = var_7748_cast_fp16, y = var_7755_cast_fp16)[name = string("k_cast_fp16")]; + bool k_scattered_transpose_x_0 = const()[name = string("k_scattered_transpose_x_0"), val = bool(false)]; + bool k_scattered_transpose_y_0 = const()[name = string("k_scattered_transpose_y_0"), val = bool(false)]; + tensor k_scattered_cast_fp16 = matmul(transpose_x = k_scattered_transpose_x_0, transpose_y = k_scattered_transpose_y_0, x = var_4055_cast_fp16, y = k_cast_fp16)[name = string("k_scattered_cast_fp16")]; + bool v_scattered_transpose_x_0 = const()[name = string("v_scattered_transpose_x_0"), val = bool(false)]; + bool v_scattered_transpose_y_0 = const()[name = string("v_scattered_transpose_y_0"), val = bool(false)]; + tensor v_scattered_cast_fp16 = matmul(transpose_x = v_scattered_transpose_x_0, transpose_y = v_scattered_transpose_y_0, x = var_4055_cast_fp16, y = v_cast_fp16)[name = string("v_scattered_cast_fp16")]; + tensor slot_k_begin_0 = const()[name = string("slot_k_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor slot_k_end_0 = const()[name = string("slot_k_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor slot_k_end_mask_0 = const()[name = string("slot_k_end_mask_0"), val = tensor([true, true, true, true])]; + tensor slot_k_cast_fp16 = slice_by_index(begin = slot_k_begin_0, end = slot_k_end_0, end_mask = slot_k_end_mask_0, x = K_full_out_1_cast_fp16)[name = string("slot_k_cast_fp16")]; + tensor slot_v_begin_0 = const()[name = string("slot_v_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor slot_v_end_0 = const()[name = string("slot_v_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor slot_v_end_mask_0 = const()[name = string("slot_v_end_mask_0"), val = tensor([true, true, true, true])]; + tensor slot_v_cast_fp16 = slice_by_index(begin = slot_v_begin_0, end = slot_v_end_0, end_mask = slot_v_end_mask_0, x = V_full_out_1_cast_fp16)[name = string("slot_v_cast_fp16")]; + tensor var_7792_cast_fp16 = mul(x = slot_k_cast_fp16, y = var_4082_cast_fp16)[name = string("op_7792_cast_fp16")]; + tensor new_k_cast_fp16 = add(x = var_7792_cast_fp16, y = k_scattered_cast_fp16)[name = string("new_k_cast_fp16")]; + tensor var_7798_cast_fp16 = mul(x = slot_v_cast_fp16, y = var_4082_cast_fp16)[name = string("op_7798_cast_fp16")]; + tensor new_v_cast_fp16 = add(x = var_7798_cast_fp16, y = v_scattered_cast_fp16)[name = string("new_v_cast_fp16")]; + int32 var_7812 = const()[name = string("op_7812"), val = int32(0)]; + bool K_full_out_interleave_0 = const()[name = string("K_full_out_interleave_0"), val = bool(false)]; + tensor K_full_out = concat(axis = var_7812, interleave = K_full_out_interleave_0, values = (var_4122_cast_fp16, new_k_cast_fp16))[name = string("K_full_out_cast_fp16")]; + int32 var_7825 = const()[name = string("op_7825"), val = int32(0)]; + bool V_full_out_interleave_0 = const()[name = string("V_full_out_interleave_0"), val = bool(false)]; + tensor V_full_out = concat(axis = var_7825, interleave = V_full_out_interleave_0, values = (var_4132_cast_fp16, new_v_cast_fp16))[name = string("V_full_out_cast_fp16")]; + tensor var_7831_begin_0 = const()[name = string("op_7831_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_7831_end_0 = const()[name = string("op_7831_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_7831_end_mask_0 = const()[name = string("op_7831_end_mask_0"), val = tensor([true, true, true, true])]; + tensor kv14_k = slice_by_index(begin = var_7831_begin_0, end = var_7831_end_0, end_mask = var_7831_end_mask_0, x = K_full_out)[name = string("op_7831_cast_fp16")]; + tensor var_7841_begin_0 = const()[name = string("op_7841_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_7841_end_0 = const()[name = string("op_7841_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_7841_end_mask_0 = const()[name = string("op_7841_end_mask_0"), val = tensor([true, true, true, true])]; + tensor kv14_v = slice_by_index(begin = var_7841_begin_0, end = var_7841_end_0, end_mask = var_7841_end_mask_0, x = V_full_out)[name = string("op_7841_cast_fp16")]; + tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_22_reps_0 = const()[name = string("tile_22_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_44_cast_fp16 = transpose(perm = transpose_44_perm_0, x = kv14_k)[name = string("transpose_13")]; + tensor tile_22_cast_fp16 = tile(reps = tile_22_reps_0, x = transpose_44_cast_fp16)[name = string("tile_22_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_44_cast_fp16 = reshape(shape = concat_48, x = tile_22_cast_fp16)[name = string("reshape_44_cast_fp16")]; + tensor transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_45_cast_fp16 = transpose(perm = transpose_45_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_12")]; + tensor reshape_45_cast_fp16 = reshape(shape = concat_49, x = transpose_45_cast_fp16)[name = string("reshape_45_cast_fp16")]; + tensor transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_23_reps_0 = const()[name = string("tile_23_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_46_cast_fp16 = transpose(perm = transpose_46_perm_0, x = kv14_v)[name = string("transpose_11")]; + tensor tile_23_cast_fp16 = tile(reps = tile_23_reps_0, x = transpose_46_cast_fp16)[name = string("tile_23_cast_fp16")]; + tensor concat_50 = const()[name = string("concat_50"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_46_cast_fp16 = reshape(shape = concat_50, x = tile_23_cast_fp16)[name = string("reshape_46_cast_fp16")]; + tensor transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_51 = const()[name = string("concat_51"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_47_cast_fp16 = transpose(perm = transpose_47_perm_0, x = reshape_46_cast_fp16)[name = string("transpose_10")]; + tensor reshape_47_cast_fp16 = reshape(shape = concat_51, x = transpose_47_cast_fp16)[name = string("reshape_47_cast_fp16")]; + tensor V_expanded_perm_0 = const()[name = string("V_expanded_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_45_transpose_x_0 = const()[name = string("attn_weights_45_transpose_x_0"), val = bool(false)]; + bool attn_weights_45_transpose_y_0 = const()[name = string("attn_weights_45_transpose_y_0"), val = bool(false)]; + tensor transpose_83_cast_fp16 = transpose(perm = transpose_83_perm_0, x = reshape_45_cast_fp16)[name = string("transpose_9")]; + tensor attn_weights_45_cast_fp16 = matmul(transpose_x = attn_weights_45_transpose_x_0, transpose_y = attn_weights_45_transpose_y_0, x = q_cast_fp16, y = transpose_83_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + tensor x_227_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask_full)[name = string("x_227_cast_fp16")]; + tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; + bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; + tensor reduce_max_11 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = x_227_cast_fp16)[name = string("reduce_max_11")]; + tensor var_7896 = sub(x = x_227_cast_fp16, y = reduce_max_11)[name = string("op_7896")]; + tensor var_7902 = exp(x = var_7896)[name = string("op_7902")]; + tensor var_7912_axes_0 = const()[name = string("op_7912_axes_0"), val = tensor([-1])]; + bool var_7912_keep_dims_0 = const()[name = string("op_7912_keep_dims_0"), val = bool(true)]; + tensor var_7912 = reduce_sum(axes = var_7912_axes_0, keep_dims = var_7912_keep_dims_0, x = var_7902)[name = string("op_7912")]; + tensor var_7918_cast_fp16 = real_div(x = var_7902, y = var_7912)[name = string("op_7918_cast_fp16")]; + bool attn_output_67_transpose_x_0 = const()[name = string("attn_output_67_transpose_x_0"), val = bool(false)]; + bool attn_output_67_transpose_y_0 = const()[name = string("attn_output_67_transpose_y_0"), val = bool(false)]; + tensor V_expanded_cast_fp16 = transpose(perm = V_expanded_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_8")]; + tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_0, transpose_y = attn_output_67_transpose_y_0, x = var_7918_cast_fp16, y = V_expanded_cast_fp16)[name = string("attn_output_67_cast_fp16")]; + tensor var_7929 = const()[name = string("op_7929"), val = tensor([0, 2, 1, 3])]; + tensor var_7936 = const()[name = string("op_7936"), val = tensor([1, 3, -1])]; + tensor var_7930_cast_fp16 = transpose(perm = var_7929, x = attn_output_67_cast_fp16)[name = string("transpose_7")]; + tensor attn_output_69_cast_fp16 = reshape(shape = var_7936, x = var_7930_cast_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor var_7941 = const()[name = string("op_7941"), val = tensor([0, 2, 1])]; + string var_7957_pad_type_0 = const()[name = string("op_7957_pad_type_0"), val = string("valid")]; + int32 var_7957_groups_0 = const()[name = string("op_7957_groups_0"), val = int32(1)]; + tensor var_7957_strides_0 = const()[name = string("op_7957_strides_0"), val = tensor([1])]; + tensor var_7957_pad_0 = const()[name = string("op_7957_pad_0"), val = tensor([0, 0])]; + tensor var_7957_dilations_0 = const()[name = string("op_7957_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566605504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571848448))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7942_cast_fp16 = transpose(perm = var_7941, x = attn_output_69_cast_fp16)[name = string("transpose_6")]; + tensor var_7957_cast_fp16 = conv(dilations = var_7957_dilations_0, groups = var_7957_groups_0, pad = var_7957_pad_0, pad_type = var_7957_pad_type_0, strides = var_7957_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7942_cast_fp16)[name = string("op_7957_cast_fp16")]; + tensor var_7961 = const()[name = string("op_7961"), val = tensor([0, 2, 1])]; + int32 var_7967 = const()[name = string("op_7967"), val = int32(-1)]; + fp16 const_135_promoted_to_fp16 = const()[name = string("const_135_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_231_cast_fp16 = transpose(perm = var_7961, x = var_7957_cast_fp16)[name = string("transpose_5")]; + tensor var_7969_cast_fp16 = mul(x = x_231_cast_fp16, y = const_135_promoted_to_fp16)[name = string("op_7969_cast_fp16")]; + bool input_337_interleave_0 = const()[name = string("input_337_interleave_0"), val = bool(false)]; + tensor input_337_cast_fp16 = concat(axis = var_7967, interleave = input_337_interleave_0, values = (x_231_cast_fp16, var_7969_cast_fp16))[name = string("input_337_cast_fp16")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_7964_to_fp16 = const()[name = string("op_7964_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_7964_to_fp16, x = input_337_cast_fp16)[name = string("normed_321_cast_fp16")]; + tensor var_7974_split_sizes_0 = const()[name = string("op_7974_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7974_axis_0 = const()[name = string("op_7974_axis_0"), val = int32(-1)]; + tensor var_7974_cast_fp16_0, tensor var_7974_cast_fp16_1 = split(axis = var_7974_axis_0, split_sizes = var_7974_split_sizes_0, x = normed_321_cast_fp16)[name = string("op_7974_cast_fp16")]; + tensor layers_11_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571851072)))]; + tensor attn_output_cast_fp16 = mul(x = var_7974_cast_fp16_0, y = layers_11_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_233_cast_fp16 = add(x = x_219_cast_fp16, y = attn_output_cast_fp16)[name = string("x_233_cast_fp16")]; + int32 var_7983 = const()[name = string("op_7983"), val = int32(-1)]; + fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7985_cast_fp16 = mul(x = x_233_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_7985_cast_fp16")]; + bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; + tensor input_339_cast_fp16 = concat(axis = var_7983, interleave = input_339_interleave_0, values = (x_233_cast_fp16, var_7985_cast_fp16))[name = string("input_339_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_7980_to_fp16 = const()[name = string("op_7980_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_7980_to_fp16, x = input_339_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor var_7990_split_sizes_0 = const()[name = string("op_7990_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7990_axis_0 = const()[name = string("op_7990_axis_0"), val = int32(-1)]; + tensor var_7990_cast_fp16_0, tensor var_7990_cast_fp16_1 = split(axis = var_7990_axis_0, split_sizes = var_7990_split_sizes_0, x = normed_325_cast_fp16)[name = string("op_7990_cast_fp16")]; + tensor layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571856256)))]; + tensor h_69_cast_fp16 = mul(x = var_7990_cast_fp16_0, y = layers_11_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_69_cast_fp16")]; + tensor var_8001 = const()[name = string("op_8001"), val = tensor([0, 2, 1])]; + tensor input_341_axes_0 = const()[name = string("input_341_axes_0"), val = tensor([2])]; + tensor var_8002 = transpose(perm = var_8001, x = h_69_cast_fp16)[name = string("transpose_4")]; + tensor input_341 = expand_dims(axes = input_341_axes_0, x = var_8002)[name = string("input_341")]; + string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; + tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; + tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; + int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; + tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = layers_11_mlp_gate_proj_weight_palettized, x = input_341)[name = string("gate_45")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_11_mlp_up_proj_weight_palettized, x = input_341)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_45)[name = string("gate")]; + tensor input_343 = mul(x = gate, y = up)[name = string("input_343")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_11_mlp_down_proj_weight_palettized, x = input_343)[name = string("mlp_out")]; + tensor var_8042_axes_0 = const()[name = string("op_8042_axes_0"), val = tensor([2])]; + tensor var_8042 = squeeze(axes = var_8042_axes_0, x = mlp_out)[name = string("op_8042")]; + tensor var_8046 = const()[name = string("op_8046"), val = tensor([0, 2, 1])]; + int32 var_8052 = const()[name = string("op_8052"), val = int32(-1)]; + fp16 const_137_promoted = const()[name = string("const_137_promoted"), val = fp16(-0x1p+0)]; + tensor x_235 = transpose(perm = var_8046, x = var_8042)[name = string("transpose_3")]; + tensor var_8054 = mul(x = x_235, y = const_137_promoted)[name = string("op_8054")]; + bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; + tensor input_345 = concat(axis = var_8052, interleave = input_345_interleave_0, values = (x_235, var_8054))[name = string("input_345")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_8049_to_fp16 = const()[name = string("op_8049_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_8049_to_fp16, x = input_345)[name = string("normed_329_cast_fp16")]; + tensor var_8059_split_sizes_0 = const()[name = string("op_8059_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8059_axis_0 = const()[name = string("op_8059_axis_0"), val = int32(-1)]; + tensor var_8059_0, tensor var_8059_1 = split(axis = var_8059_axis_0, split_sizes = var_8059_split_sizes_0, x = normed_329_cast_fp16)[name = string("op_8059")]; + tensor hidden_states_113 = mul(x = var_8059_0, y = layers_11_post_feedforward_layernorm_weight)[name = string("hidden_states_113")]; + tensor hidden_states_115_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_113)[name = string("hidden_states_115_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 5888])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 3, 6144])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; + tensor var_8087 = const()[name = string("op_8087"), val = tensor([0, 2, 1])]; + tensor input_347_axes_0 = const()[name = string("input_347_axes_0"), val = tensor([2])]; + tensor var_8088 = transpose(perm = var_8087, x = hidden_states_115_cast_fp16)[name = string("transpose_2")]; + tensor input_347 = expand_dims(axes = input_347_axes_0, x = var_8088)[name = string("input_347")]; + string gated_67_pad_type_0 = const()[name = string("gated_67_pad_type_0"), val = string("valid")]; + tensor gated_67_strides_0 = const()[name = string("gated_67_strides_0"), val = tensor([1, 1])]; + tensor gated_67_pad_0 = const()[name = string("gated_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_67_dilations_0 = const()[name = string("gated_67_dilations_0"), val = tensor([1, 1])]; + int32 gated_67_groups_0 = const()[name = string("gated_67_groups_0"), val = int32(1)]; + tensor gated_67 = conv(dilations = gated_67_dilations_0, groups = gated_67_groups_0, pad = gated_67_pad_0, pad_type = gated_67_pad_type_0, strides = gated_67_strides_0, weight = layers_11_per_layer_input_gate_weight_palettized, x = input_347)[name = string("gated_67")]; + string gated_69_mode_0 = const()[name = string("gated_69_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_69 = gelu(mode = gated_69_mode_0, x = gated_67)[name = string("gated_69")]; + tensor var_8107 = const()[name = string("op_8107"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_8108_cast_fp16 = transpose(perm = var_8107, x = per_layer_slice_cast_fp16)[name = string("transpose_1")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_8108_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_349_cast_fp16 = mul(x = gated_69, y = per_layer_slice_conv_cast_fp16)[name = string("input_349_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_11_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571861440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572189184))))[name = string("layers_11_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_11_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_349_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_8124_axes_0 = const()[name = string("op_8124_axes_0"), val = tensor([2])]; + tensor var_8124_cast_fp16 = squeeze(axes = var_8124_axes_0, x = gated_cast_fp16)[name = string("op_8124_cast_fp16")]; + tensor var_8128 = const()[name = string("op_8128"), val = tensor([0, 2, 1])]; + int32 var_8134 = const()[name = string("op_8134"), val = int32(-1)]; + fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_cast_fp16 = transpose(perm = var_8128, x = var_8124_cast_fp16)[name = string("transpose_0")]; + tensor var_8136_cast_fp16 = mul(x = x_cast_fp16, y = const_138_promoted_to_fp16)[name = string("op_8136_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_8134, interleave = input_interleave_0, values = (x_cast_fp16, var_8136_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_8131_to_fp16 = const()[name = string("op_8131_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_8131_to_fp16, x = input_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor var_8141_split_sizes_0 = const()[name = string("op_8141_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8141_axis_0 = const()[name = string("op_8141_axis_0"), val = int32(-1)]; + tensor var_8141_cast_fp16_0, tensor var_8141_cast_fp16_1 = split(axis = var_8141_axis_0, split_sizes = var_8141_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_8141_cast_fp16")]; + tensor layers_11_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_11_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572191808)))]; + tensor hidden_states_119_cast_fp16 = mul(x = var_8141_cast_fp16_0, y = layers_11_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_115_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_139_promoted_to_fp16 = const()[name = string("const_139_promoted_to_fp16"), val = tensor([0x1.0cp-4])]; + tensor hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_139_promoted_to_fp16)[name = string("op_8151_cast_fp16")]; + } -> (hidden_states_out, K_sliding_out, V_sliding_out, K_full_out, V_full_out, kv13_k, kv13_v, kv14_k, kv14_v); +} \ No newline at end of file diff --git a/chunk2.mlmodelc/weights/weight.bin b/chunk2.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..f8045f8f733b5dc1a6d7f1a7af51616c898f200d --- /dev/null +++ b/chunk2.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6cd92e3945e5e809a15df7a1d9e648fb651e859d733d9589eba817805e2d96d +size 572196992 diff --git a/chunk2_3way.mlmodelc/analytics/coremldata.bin b/chunk2_3way.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..f60ff7a72ea4dd66c7a0676779b6bc8b8c360445 --- /dev/null +++ b/chunk2_3way.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:223a79744041af35a291271aca045883b40f5cc88ad1fb9040a2ee0a2a5b25b9 +size 243 diff --git a/chunk2_3way.mlmodelc/coremldata.bin b/chunk2_3way.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..b39ce2e01c235a36ebcc7235d33b5057c27ec648 --- /dev/null +++ b/chunk2_3way.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476686c14666d2a23a5e04271a9bb1f2ce006c76ac085370b9f10fe90a05c810 +size 979 diff --git a/chunk2_3way.mlmodelc/metadata.json b/chunk2_3way.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..33391944fd34a8cf16e1b72070ee0274a55a4b18 --- /dev/null +++ b/chunk2_3way.mlmodelc/metadata.json @@ -0,0 +1,285 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), Palettized (9 bits), UInt4)", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 2560)", + "shortDescription" : "", + "shape" : "[1, 1, 2560]", + "name" : "hidden_states_out", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)", + "shortDescription" : "", + "shape" : "[10, 2, 512, 512]", + "name" : "K_sliding_out", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)", + "shortDescription" : "", + "shape" : "[10, 2, 512, 512]", + "name" : "V_sliding_out", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)", + "shortDescription" : "", + "shape" : "[2, 2, 2048, 512]", + "name" : "K_full_out", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)", + "shortDescription" : "", + "shape" : "[2, 2, 2048, 512]", + "name" : "V_full_out", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)", + "shortDescription" : "", + "shape" : "[1, 2, 512, 256]", + "name" : "kv13_k", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)", + "shortDescription" : "", + "shape" : "[1, 2, 512, 256]", + "name" : "kv13_v", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)", + "shortDescription" : "", + "shape" : "[1, 2, 2048, 512]", + "name" : "kv14_k", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)", + "shortDescription" : "", + "shape" : "[1, 2, 2048, 512]", + "name" : "kv14_v", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.expandDims" : 108, + "Ios18.mul" : 457, + "Ios18.matmul" : 42, + "Ios18.rsqrt" : 12, + "Ios18.exp" : 21, + "Ios16.reduceMean" : 12, + "Ios18.realDiv" : 21, + "Split" : 171, + "Ios16.reduceMax" : 21, + "Tile" : 28, + "Ios18.add" : 133, + "Ios16.reduceSum" : 21, + "Ios18.layerNorm" : 138, + "Ios18.reshape" : 180, + "Pad" : 20, + "Ios18.constexprLutToDense" : 171, + "Ios18.conv" : 171, + "Ios18.concat" : 191, + "Ios18.transpose" : 306, + "Ios18.sub" : 22, + "Ios18.pow" : 12, + "Ios18.gelu" : 42, + "Stack" : 4, + "Ios18.sliceByIndex" : 85, + "Ios18.squeeze" : 66 + }, + "computePrecision" : "Mixed (Float16, Float32, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.conversion_date" : "2026-04-30", + "com.github.apple.coremltools.source" : "torch==2.11.0", + "com.github.apple.coremltools.version" : "9.0", + "com.github.apple.coremltools.source_dialect" : "TorchScript" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 2560)", + "shortDescription" : "", + "shape" : "[1, 1, 2560]", + "name" : "hidden_states", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 2048]", + "name" : "causal_mask_full", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 512]", + "name" : "causal_mask_sliding", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 1)", + "shortDescription" : "", + "shape" : "[1, 1, 2048, 1]", + "name" : "update_mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 10752)", + "shortDescription" : "", + "shape" : "[1, 1, 10752]", + "name" : "per_layer_combined", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 256]", + "name" : "cos_s", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 256]", + "name" : "sin_s", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 512]", + "name" : "cos_f", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 512]", + "name" : "sin_f", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)", + "shortDescription" : "", + "shape" : "[10, 2, 512, 512]", + "name" : "K_sliding_in", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)", + "shortDescription" : "", + "shape" : "[10, 2, 512, 512]", + "name" : "V_sliding_in", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)", + "shortDescription" : "", + "shape" : "[2, 2, 2048, 512]", + "name" : "K_full_in", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)", + "shortDescription" : "", + "shape" : "[2, 2, 2048, 512]", + "name" : "V_full_in", + "type" : "MultiArray" + } + ], + "generatedClassName" : "chunk2_3way", + "method" : "predict" + } +] \ No newline at end of file diff --git a/chunk2_3way.mlmodelc/model.mil b/chunk2_3way.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..4bca220b2305808e8f867a57f71ff4f06053db2b --- /dev/null +++ b/chunk2_3way.mlmodelc/model.mil @@ -0,0 +1,5936 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func main(tensor K_full_in, tensor K_sliding_in, tensor V_full_in, tensor V_sliding_in, tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor per_layer_combined, tensor sin_f, tensor sin_s, tensor update_mask) { + tensor layers_c2_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2621568))))[name = string("layers_c2_0_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_0_self_attn_q_norm_weight = const()[name = string("layers_c2_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2623680)))]; + tensor layers_c2_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2624256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3279680))))[name = string("layers_c2_0_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3280256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3935680))))[name = string("layers_c2_0_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_0_self_attn_k_norm_weight = const()[name = string("layers_c2_0_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3936256)))]; + tensor layers_c2_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3936832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17044096))))[name = string("layers_c2_0_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17054400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30161664))))[name = string("layers_c2_0_mlp_up_proj_weight_palettized")]; + tensor layers_c2_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30171968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43279232))))[name = string("layers_c2_0_mlp_down_proj_weight_palettized")]; + tensor layers_c2_0_post_feedforward_layernorm_weight = const()[name = string("layers_c2_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43281856)))]; + tensor layers_c2_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43287040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43614784))))[name = string("layers_c2_0_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43615104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46236608))))[name = string("layers_c2_1_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_1_self_attn_q_norm_weight = const()[name = string("layers_c2_1_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46238720)))]; + tensor layers_c2_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46239296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46894720))))[name = string("layers_c2_1_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46895296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47550720))))[name = string("layers_c2_1_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_1_self_attn_k_norm_weight = const()[name = string("layers_c2_1_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47551296)))]; + tensor layers_c2_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47551872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60659136))))[name = string("layers_c2_1_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60669440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73776704))))[name = string("layers_c2_1_mlp_up_proj_weight_palettized")]; + tensor layers_c2_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73787008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86894272))))[name = string("layers_c2_1_mlp_down_proj_weight_palettized")]; + tensor layers_c2_1_post_feedforward_layernorm_weight = const()[name = string("layers_c2_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86896896)))]; + tensor layers_c2_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86902080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87229824))))[name = string("layers_c2_1_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87230144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89851648))))[name = string("layers_c2_2_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_2_self_attn_q_norm_weight = const()[name = string("layers_c2_2_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89853760)))]; + tensor layers_c2_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89854336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90509760))))[name = string("layers_c2_2_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90510336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91165760))))[name = string("layers_c2_2_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_2_self_attn_k_norm_weight = const()[name = string("layers_c2_2_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91166336)))]; + tensor layers_c2_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91166912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104274176))))[name = string("layers_c2_2_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104284480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117391744))))[name = string("layers_c2_2_mlp_up_proj_weight_palettized")]; + tensor layers_c2_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117402048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130509312))))[name = string("layers_c2_2_mlp_down_proj_weight_palettized")]; + tensor layers_c2_2_post_feedforward_layernorm_weight = const()[name = string("layers_c2_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130511936)))]; + tensor layers_c2_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130517120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130844864))))[name = string("layers_c2_2_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130845184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133466688))))[name = string("layers_c2_3_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_3_self_attn_q_norm_weight = const()[name = string("layers_c2_3_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133468800)))]; + tensor layers_c2_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133469376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134124800))))[name = string("layers_c2_3_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134125376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134780800))))[name = string("layers_c2_3_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_3_self_attn_k_norm_weight = const()[name = string("layers_c2_3_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134781376)))]; + tensor layers_c2_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134781952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147889216))))[name = string("layers_c2_3_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147899520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161006784))))[name = string("layers_c2_3_mlp_up_proj_weight_palettized")]; + tensor layers_c2_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161017088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174124352))))[name = string("layers_c2_3_mlp_down_proj_weight_palettized")]; + tensor layers_c2_3_post_feedforward_layernorm_weight = const()[name = string("layers_c2_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174126976)))]; + tensor layers_c2_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174132160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174459904))))[name = string("layers_c2_3_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174460224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177081728))))[name = string("layers_c2_4_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_4_self_attn_q_norm_weight = const()[name = string("layers_c2_4_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177083840)))]; + tensor layers_c2_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177084416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177739840))))[name = string("layers_c2_4_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177740416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178395840))))[name = string("layers_c2_4_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_4_self_attn_k_norm_weight = const()[name = string("layers_c2_4_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178396416)))]; + tensor layers_c2_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178396992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191504256))))[name = string("layers_c2_4_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191514560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204621824))))[name = string("layers_c2_4_mlp_up_proj_weight_palettized")]; + tensor layers_c2_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204632128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217739392))))[name = string("layers_c2_4_mlp_down_proj_weight_palettized")]; + tensor layers_c2_4_post_feedforward_layernorm_weight = const()[name = string("layers_c2_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217742016)))]; + tensor layers_c2_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217747200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218074944))))[name = string("layers_c2_4_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218075264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223318208))))[name = string("layers_c2_5_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_5_self_attn_q_norm_weight = const()[name = string("layers_c2_5_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223322368)))]; + tensor layers_c2_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223323456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224634240))))[name = string("layers_c2_5_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224635328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225946112))))[name = string("layers_c2_5_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_5_self_attn_k_norm_weight = const()[name = string("layers_c2_5_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225947200)))]; + tensor layers_c2_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225948288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239055552))))[name = string("layers_c2_5_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239065856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252173120))))[name = string("layers_c2_5_mlp_up_proj_weight_palettized")]; + tensor layers_c2_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252183424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265290688))))[name = string("layers_c2_5_mlp_down_proj_weight_palettized")]; + tensor layers_c2_5_post_feedforward_layernorm_weight = const()[name = string("layers_c2_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265293312)))]; + tensor layers_c2_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265298496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265626240))))[name = string("layers_c2_5_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265626560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268248064))))[name = string("layers_c2_6_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268250176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268905600))))[name = string("layers_c2_6_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268906176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269561600))))[name = string("layers_c2_6_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_6_self_attn_k_norm_weight = const()[name = string("layers_c2_6_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269562176)))]; + tensor layers_c2_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269562752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282670016))))[name = string("layers_c2_6_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282680320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295787584))))[name = string("layers_c2_6_mlp_up_proj_weight_palettized")]; + tensor layers_c2_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295797888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308905152))))[name = string("layers_c2_6_mlp_down_proj_weight_palettized")]; + tensor layers_c2_6_post_feedforward_layernorm_weight = const()[name = string("layers_c2_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308907776)))]; + tensor layers_c2_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308912960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309240704))))[name = string("layers_c2_6_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309241024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311862528))))[name = string("layers_c2_7_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_7_self_attn_q_norm_weight = const()[name = string("layers_c2_7_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311864640)))]; + tensor layers_c2_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311865216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312520640))))[name = string("layers_c2_7_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312521216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313176640))))[name = string("layers_c2_7_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_7_self_attn_k_norm_weight = const()[name = string("layers_c2_7_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313177216)))]; + tensor layers_c2_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313177792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326285056))))[name = string("layers_c2_7_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326295360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339402624))))[name = string("layers_c2_7_mlp_up_proj_weight_palettized")]; + tensor layers_c2_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339412928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352520192))))[name = string("layers_c2_7_mlp_down_proj_weight_palettized")]; + tensor layers_c2_7_post_feedforward_layernorm_weight = const()[name = string("layers_c2_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352522816)))]; + tensor layers_c2_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352528000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352855744))))[name = string("layers_c2_7_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352856064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355477568))))[name = string("layers_c2_8_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355479680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356135104))))[name = string("layers_c2_8_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356135680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356791104))))[name = string("layers_c2_8_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_8_self_attn_k_norm_weight = const()[name = string("layers_c2_8_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356791680)))]; + tensor layers_c2_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356792256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369899520))))[name = string("layers_c2_8_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369909824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383017088))))[name = string("layers_c2_8_mlp_up_proj_weight_palettized")]; + tensor layers_c2_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383027392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396134656))))[name = string("layers_c2_8_mlp_down_proj_weight_palettized")]; + tensor layers_c2_8_post_feedforward_layernorm_weight = const()[name = string("layers_c2_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396137280)))]; + tensor layers_c2_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396142464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396470208))))[name = string("layers_c2_8_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396470528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399092032))))[name = string("layers_c2_9_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_9_self_attn_q_norm_weight = const()[name = string("layers_c2_9_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399094144)))]; + tensor layers_c2_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399094720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399750144))))[name = string("layers_c2_9_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399750720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400406144))))[name = string("layers_c2_9_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_9_self_attn_k_norm_weight = const()[name = string("layers_c2_9_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400406720)))]; + tensor layers_c2_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400407296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413514560))))[name = string("layers_c2_9_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413524864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426632128))))[name = string("layers_c2_9_mlp_up_proj_weight_palettized")]; + tensor layers_c2_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426642432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439749696))))[name = string("layers_c2_9_mlp_down_proj_weight_palettized")]; + tensor layers_c2_9_post_feedforward_layernorm_weight = const()[name = string("layers_c2_9_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439752320)))]; + tensor layers_c2_9_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(439757504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440085248))))[name = string("layers_c2_9_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440085568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442707072))))[name = string("layers_c2_10_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_10_self_attn_q_norm_weight = const()[name = string("layers_c2_10_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442709184)))]; + tensor layers_c2_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442709760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443365184))))[name = string("layers_c2_10_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443365760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444021184))))[name = string("layers_c2_10_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444021760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457129024))))[name = string("layers_c2_10_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457139328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470246592))))[name = string("layers_c2_10_mlp_up_proj_weight_palettized")]; + tensor layers_c2_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470256896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483364160))))[name = string("layers_c2_10_mlp_down_proj_weight_palettized")]; + tensor layers_c2_10_post_feedforward_layernorm_weight = const()[name = string("layers_c2_10_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483366784)))]; + tensor layers_c2_10_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483371968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483699712))))[name = string("layers_c2_10_per_layer_input_gate_weight_palettized")]; + tensor layers_c2_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(483700032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488942976))))[name = string("layers_c2_11_self_attn_q_proj_weight_palettized")]; + tensor layers_c2_11_self_attn_q_norm_weight = const()[name = string("layers_c2_11_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488947136)))]; + tensor layers_c2_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488948224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490259008))))[name = string("layers_c2_11_self_attn_k_proj_weight_palettized")]; + tensor layers_c2_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490260096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491570880))))[name = string("layers_c2_11_self_attn_v_proj_weight_palettized")]; + tensor layers_c2_11_self_attn_k_norm_weight = const()[name = string("layers_c2_11_self_attn_k_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491571968)))]; + tensor layers_c2_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491573056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504680320))))[name = string("layers_c2_11_mlp_gate_proj_weight_palettized")]; + tensor layers_c2_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504690624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517797888))))[name = string("layers_c2_11_mlp_up_proj_weight_palettized")]; + tensor layers_c2_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517808192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530915456))))[name = string("layers_c2_11_mlp_down_proj_weight_palettized")]; + tensor layers_c2_11_post_feedforward_layernorm_weight = const()[name = string("layers_c2_11_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530918080)))]; + tensor layers_c2_11_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530923264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531251008))))[name = string("layers_c2_11_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531251328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533872832))))[name = string("layers_c3_0_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(533874944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546982208))))[name = string("layers_c3_0_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546992512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560099776))))[name = string("layers_c3_0_mlp_up_proj_weight_palettized")]; + tensor layers_c3_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560110080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573217344))))[name = string("layers_c3_0_mlp_down_proj_weight_palettized")]; + tensor layers_c3_0_post_feedforward_layernorm_weight = const()[name = string("layers_c3_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573219968)))]; + tensor layers_c3_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573225152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573552896))))[name = string("layers_c3_0_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573553216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576174720))))[name = string("layers_c3_1_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(576176832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589284096))))[name = string("layers_c3_1_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589294400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(602401664))))[name = string("layers_c3_1_mlp_up_proj_weight_palettized")]; + tensor layers_c3_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(602411968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(615519232))))[name = string("layers_c3_1_mlp_down_proj_weight_palettized")]; + tensor layers_c3_1_post_feedforward_layernorm_weight = const()[name = string("layers_c3_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(615521856)))]; + tensor layers_c3_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(615527040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(615854784))))[name = string("layers_c3_1_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(615855104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618476608))))[name = string("layers_c3_2_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618478720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631585984))))[name = string("layers_c3_2_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631596288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(644703552))))[name = string("layers_c3_2_mlp_up_proj_weight_palettized")]; + tensor layers_c3_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(644713856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657821120))))[name = string("layers_c3_2_mlp_down_proj_weight_palettized")]; + tensor layers_c3_2_post_feedforward_layernorm_weight = const()[name = string("layers_c3_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657823744)))]; + tensor layers_c3_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657828928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658156672))))[name = string("layers_c3_2_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658156992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660778496))))[name = string("layers_c3_3_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660780608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673887872))))[name = string("layers_c3_3_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673898176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687005440))))[name = string("layers_c3_3_mlp_up_proj_weight_palettized")]; + tensor layers_c3_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(687015744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700123008))))[name = string("layers_c3_3_mlp_down_proj_weight_palettized")]; + tensor layers_c3_3_post_feedforward_layernorm_weight = const()[name = string("layers_c3_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700125632)))]; + tensor layers_c3_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700130816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700458560))))[name = string("layers_c3_3_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700458880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(703080384))))[name = string("layers_c3_4_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(703082496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716189760))))[name = string("layers_c3_4_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716200064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729307328))))[name = string("layers_c3_4_mlp_up_proj_weight_palettized")]; + tensor layers_c3_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729317632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742424896))))[name = string("layers_c3_4_mlp_down_proj_weight_palettized")]; + tensor layers_c3_4_post_feedforward_layernorm_weight = const()[name = string("layers_c3_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742427520)))]; + tensor layers_c3_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742432704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742760448))))[name = string("layers_c3_4_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(742760768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748003712))))[name = string("layers_c3_5_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748007872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(761115136))))[name = string("layers_c3_5_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(761125440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774232704))))[name = string("layers_c3_5_mlp_up_proj_weight_palettized")]; + tensor layers_c3_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774243008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787350272))))[name = string("layers_c3_5_mlp_down_proj_weight_palettized")]; + tensor layers_c3_5_post_feedforward_layernorm_weight = const()[name = string("layers_c3_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787352896)))]; + tensor layers_c3_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787358080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787685824))))[name = string("layers_c3_5_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787686144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790307648))))[name = string("layers_c3_6_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790309760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803417024))))[name = string("layers_c3_6_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(803427328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816534592))))[name = string("layers_c3_6_mlp_up_proj_weight_palettized")]; + tensor layers_c3_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816544896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829652160))))[name = string("layers_c3_6_mlp_down_proj_weight_palettized")]; + tensor layers_c3_6_post_feedforward_layernorm_weight = const()[name = string("layers_c3_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829654784)))]; + tensor layers_c3_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829659968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829987712))))[name = string("layers_c3_6_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(829988032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832609536))))[name = string("layers_c3_7_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(832611648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845718912))))[name = string("layers_c3_7_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845729216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858836480))))[name = string("layers_c3_7_mlp_up_proj_weight_palettized")]; + tensor layers_c3_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858846784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871954048))))[name = string("layers_c3_7_mlp_down_proj_weight_palettized")]; + tensor layers_c3_7_post_feedforward_layernorm_weight = const()[name = string("layers_c3_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871956672)))]; + tensor layers_c3_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871961856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872289600))))[name = string("layers_c3_7_per_layer_input_gate_weight_palettized")]; + tensor layers_c3_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872289920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(874911424))))[name = string("layers_c3_8_self_attn_q_proj_weight_palettized")]; + tensor layers_c3_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(874913536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(888020800))))[name = string("layers_c3_8_mlp_gate_proj_weight_palettized")]; + tensor layers_c3_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(888031104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(901138368))))[name = string("layers_c3_8_mlp_up_proj_weight_palettized")]; + tensor layers_c3_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(901148672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914255936))))[name = string("layers_c3_8_mlp_down_proj_weight_palettized")]; + tensor layers_c3_8_post_feedforward_layernorm_weight = const()[name = string("layers_c3_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914258560)))]; + tensor layers_c3_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914263744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914591488))))[name = string("layers_c3_8_per_layer_input_gate_weight_palettized")]; + tensor var_1168_begin_0 = const()[name = string("op_1168_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1168_end_0 = const()[name = string("op_1168_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1168_end_mask_0 = const()[name = string("op_1168_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1168_squeeze_mask_0 = const()[name = string("op_1168_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1168_cast_fp16 = slice_by_index(begin = var_1168_begin_0, end = var_1168_end_0, end_mask = var_1168_end_mask_0, squeeze_mask = var_1168_squeeze_mask_0, x = K_sliding_in)[name = string("op_1168_cast_fp16")]; + tensor K_sliding_slot_1_axes_0 = const()[name = string("K_sliding_slot_1_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_1_cast_fp16 = expand_dims(axes = K_sliding_slot_1_axes_0, x = var_1168_cast_fp16)[name = string("K_sliding_slot_1_cast_fp16")]; + tensor var_1173_begin_0 = const()[name = string("op_1173_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1173_end_0 = const()[name = string("op_1173_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1173_end_mask_0 = const()[name = string("op_1173_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1173_squeeze_mask_0 = const()[name = string("op_1173_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1173_cast_fp16 = slice_by_index(begin = var_1173_begin_0, end = var_1173_end_0, end_mask = var_1173_end_mask_0, squeeze_mask = var_1173_squeeze_mask_0, x = V_sliding_in)[name = string("op_1173_cast_fp16")]; + tensor V_sliding_slot_1_axes_0 = const()[name = string("V_sliding_slot_1_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_1_cast_fp16 = expand_dims(axes = V_sliding_slot_1_axes_0, x = var_1173_cast_fp16)[name = string("V_sliding_slot_1_cast_fp16")]; + int32 var_1180 = const()[name = string("op_1180"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1182_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_1182_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_1180, interleave = input_1_interleave_0, values = (hidden_states, var_1182_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_1177_to_fp16 = const()[name = string("op_1177_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1177_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_1187_split_sizes_0 = const()[name = string("op_1187_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1187_axis_0 = const()[name = string("op_1187_axis_0"), val = int32(-1)]; + tensor var_1187_cast_fp16_0, tensor var_1187_cast_fp16_1 = split(axis = var_1187_axis_0, split_sizes = var_1187_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_1187_cast_fp16")]; + tensor layers_c2_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914591808)))]; + tensor h_1_cast_fp16 = mul(x = var_1187_cast_fp16_0, y = layers_c2_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_1193 = const()[name = string("op_1193"), val = tensor([0, 2, 1])]; + tensor var_1196_axes_0 = const()[name = string("op_1196_axes_0"), val = tensor([2])]; + tensor var_1194_cast_fp16 = transpose(perm = var_1193, x = h_1_cast_fp16)[name = string("transpose_305")]; + tensor var_1196_cast_fp16 = expand_dims(axes = var_1196_axes_0, x = var_1194_cast_fp16)[name = string("op_1196_cast_fp16")]; + string var_1212_pad_type_0 = const()[name = string("op_1212_pad_type_0"), val = string("valid")]; + tensor var_1212_strides_0 = const()[name = string("op_1212_strides_0"), val = tensor([1, 1])]; + tensor var_1212_pad_0 = const()[name = string("op_1212_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1212_dilations_0 = const()[name = string("op_1212_dilations_0"), val = tensor([1, 1])]; + int32 var_1212_groups_0 = const()[name = string("op_1212_groups_0"), val = int32(1)]; + tensor var_1212 = conv(dilations = var_1212_dilations_0, groups = var_1212_groups_0, pad = var_1212_pad_0, pad_type = var_1212_pad_type_0, strides = var_1212_strides_0, weight = layers_c2_0_self_attn_q_proj_weight_palettized, x = var_1196_cast_fp16)[name = string("op_1212")]; + tensor var_1217 = const()[name = string("op_1217"), val = tensor([1, 8, 256, 1])]; + tensor var_1218 = reshape(shape = var_1217, x = var_1212)[name = string("op_1218")]; + tensor var_1223 = const()[name = string("op_1223"), val = tensor([0, 1, 3, 2])]; + tensor var_1233 = const()[name = string("op_1233"), val = tensor([1, 8, 256])]; + tensor var_1224 = transpose(perm = var_1223, x = var_1218)[name = string("transpose_304")]; + tensor x_1 = reshape(shape = var_1233, x = var_1224)[name = string("x_1")]; + int32 var_1239 = const()[name = string("op_1239"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_1241 = mul(x = x_1, y = const_1_promoted)[name = string("op_1241")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_1239, interleave = input_5_interleave_0, values = (x_1, var_1241))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_1236_to_fp16 = const()[name = string("op_1236_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1236_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor var_1246_split_sizes_0 = const()[name = string("op_1246_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1246_axis_0 = const()[name = string("op_1246_axis_0"), val = int32(-1)]; + tensor var_1246_0, tensor var_1246_1 = split(axis = var_1246_axis_0, split_sizes = var_1246_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_1246")]; + tensor var_1248 = mul(x = var_1246_0, y = layers_c2_0_self_attn_q_norm_weight)[name = string("op_1248")]; + tensor var_1253 = const()[name = string("op_1253"), val = tensor([1, 8, 1, 256])]; + tensor q_3 = reshape(shape = var_1253, x = var_1248)[name = string("q_3")]; + tensor var_1255_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_1255_cast_fp16")]; + tensor var_1256_split_sizes_0 = const()[name = string("op_1256_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1256_axis_0 = const()[name = string("op_1256_axis_0"), val = int32(-1)]; + tensor var_1256_0, tensor var_1256_1 = split(axis = var_1256_axis_0, split_sizes = var_1256_split_sizes_0, x = q_3)[name = string("op_1256")]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_1258 = mul(x = var_1256_1, y = const_2_promoted)[name = string("op_1258")]; + int32 var_1260 = const()[name = string("op_1260"), val = int32(-1)]; + bool var_1261_interleave_0 = const()[name = string("op_1261_interleave_0"), val = bool(false)]; + tensor var_1261 = concat(axis = var_1260, interleave = var_1261_interleave_0, values = (var_1258, var_1256_0))[name = string("op_1261")]; + tensor var_1262_cast_fp16 = mul(x = var_1261, y = sin_s)[name = string("op_1262_cast_fp16")]; + tensor q_7_cast_fp16 = add(x = var_1255_cast_fp16, y = var_1262_cast_fp16)[name = string("q_7_cast_fp16")]; + string var_1275_pad_type_0 = const()[name = string("op_1275_pad_type_0"), val = string("valid")]; + tensor var_1275_strides_0 = const()[name = string("op_1275_strides_0"), val = tensor([1, 1])]; + tensor var_1275_pad_0 = const()[name = string("op_1275_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1275_dilations_0 = const()[name = string("op_1275_dilations_0"), val = tensor([1, 1])]; + int32 var_1275_groups_0 = const()[name = string("op_1275_groups_0"), val = int32(1)]; + tensor var_1275 = conv(dilations = var_1275_dilations_0, groups = var_1275_groups_0, pad = var_1275_pad_0, pad_type = var_1275_pad_type_0, strides = var_1275_strides_0, weight = layers_c2_0_self_attn_k_proj_weight_palettized, x = var_1196_cast_fp16)[name = string("op_1275")]; + tensor var_1280 = const()[name = string("op_1280"), val = tensor([1, 2, 256, 1])]; + tensor var_1281 = reshape(shape = var_1280, x = var_1275)[name = string("op_1281")]; + tensor var_1286 = const()[name = string("op_1286"), val = tensor([0, 1, 3, 2])]; + string var_1303_pad_type_0 = const()[name = string("op_1303_pad_type_0"), val = string("valid")]; + tensor var_1303_strides_0 = const()[name = string("op_1303_strides_0"), val = tensor([1, 1])]; + tensor var_1303_pad_0 = const()[name = string("op_1303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1303_dilations_0 = const()[name = string("op_1303_dilations_0"), val = tensor([1, 1])]; + int32 var_1303_groups_0 = const()[name = string("op_1303_groups_0"), val = int32(1)]; + tensor var_1303 = conv(dilations = var_1303_dilations_0, groups = var_1303_groups_0, pad = var_1303_pad_0, pad_type = var_1303_pad_type_0, strides = var_1303_strides_0, weight = layers_c2_0_self_attn_v_proj_weight_palettized, x = var_1196_cast_fp16)[name = string("op_1303")]; + tensor var_1308 = const()[name = string("op_1308"), val = tensor([1, 2, 256, 1])]; + tensor var_1309 = reshape(shape = var_1308, x = var_1303)[name = string("op_1309")]; + tensor var_1314 = const()[name = string("op_1314"), val = tensor([0, 1, 3, 2])]; + tensor var_1324 = const()[name = string("op_1324"), val = tensor([1, 2, 256])]; + tensor var_1287 = transpose(perm = var_1286, x = var_1281)[name = string("transpose_303")]; + tensor x_3 = reshape(shape = var_1324, x = var_1287)[name = string("x_3")]; + int32 var_1330 = const()[name = string("op_1330"), val = int32(-1)]; + fp16 const_3_promoted = const()[name = string("const_3_promoted"), val = fp16(-0x1p+0)]; + tensor var_1332 = mul(x = x_3, y = const_3_promoted)[name = string("op_1332")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_1330, interleave = input_7_interleave_0, values = (x_3, var_1332))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_1327_to_fp16 = const()[name = string("op_1327_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1327_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor var_1337_split_sizes_0 = const()[name = string("op_1337_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1337_axis_0 = const()[name = string("op_1337_axis_0"), val = int32(-1)]; + tensor var_1337_0, tensor var_1337_1 = split(axis = var_1337_axis_0, split_sizes = var_1337_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_1337")]; + tensor var_1339 = mul(x = var_1337_0, y = layers_c2_0_self_attn_k_norm_weight)[name = string("op_1339")]; + tensor var_1344 = const()[name = string("op_1344"), val = tensor([1, 2, 1, 256])]; + tensor q_5 = reshape(shape = var_1344, x = var_1339)[name = string("q_5")]; + fp16 var_1346_promoted = const()[name = string("op_1346_promoted"), val = fp16(0x1p+1)]; + tensor var_1315 = transpose(perm = var_1314, x = var_1309)[name = string("transpose_302")]; + tensor var_1347 = pow(x = var_1315, y = var_1346_promoted)[name = string("op_1347")]; + tensor var_1352_axes_0 = const()[name = string("op_1352_axes_0"), val = tensor([-1])]; + bool var_1352_keep_dims_0 = const()[name = string("op_1352_keep_dims_0"), val = bool(true)]; + tensor var_1352 = reduce_mean(axes = var_1352_axes_0, keep_dims = var_1352_keep_dims_0, x = var_1347)[name = string("op_1352")]; + fp16 var_1354_to_fp16 = const()[name = string("op_1354_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_1_cast_fp16 = add(x = var_1352, y = var_1354_to_fp16)[name = string("mean_sq_1_cast_fp16")]; + fp32 var_1356_epsilon_0 = const()[name = string("op_1356_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1356_cast_fp16 = rsqrt(epsilon = var_1356_epsilon_0, x = mean_sq_1_cast_fp16)[name = string("op_1356_cast_fp16")]; + tensor input_11_cast_fp16 = mul(x = var_1315, y = var_1356_cast_fp16)[name = string("input_11_cast_fp16")]; + tensor var_1358_cast_fp16 = mul(x = q_5, y = cos_s)[name = string("op_1358_cast_fp16")]; + tensor var_1359_split_sizes_0 = const()[name = string("op_1359_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1359_axis_0 = const()[name = string("op_1359_axis_0"), val = int32(-1)]; + tensor var_1359_0, tensor var_1359_1 = split(axis = var_1359_axis_0, split_sizes = var_1359_split_sizes_0, x = q_5)[name = string("op_1359")]; + fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; + tensor var_1361 = mul(x = var_1359_1, y = const_4_promoted)[name = string("op_1361")]; + int32 var_1363 = const()[name = string("op_1363"), val = int32(-1)]; + bool var_1364_interleave_0 = const()[name = string("op_1364_interleave_0"), val = bool(false)]; + tensor var_1364 = concat(axis = var_1363, interleave = var_1364_interleave_0, values = (var_1361, var_1359_0))[name = string("op_1364")]; + tensor var_1365_cast_fp16 = mul(x = var_1364, y = sin_s)[name = string("op_1365_cast_fp16")]; + tensor input_9_cast_fp16 = add(x = var_1358_cast_fp16, y = var_1365_cast_fp16)[name = string("input_9_cast_fp16")]; + tensor k_padded_1_pad_0 = const()[name = string("k_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_1_mode_0 = const()[name = string("k_padded_1_mode_0"), val = string("constant")]; + fp16 const_5_to_fp16 = const()[name = string("const_5_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_1_cast_fp16 = pad(constant_val = const_5_to_fp16, mode = k_padded_1_mode_0, pad = k_padded_1_pad_0, x = input_9_cast_fp16)[name = string("k_padded_1_cast_fp16")]; + tensor v_padded_1_pad_0 = const()[name = string("v_padded_1_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_1_mode_0 = const()[name = string("v_padded_1_mode_0"), val = string("constant")]; + fp16 const_6_to_fp16 = const()[name = string("const_6_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_1_cast_fp16 = pad(constant_val = const_6_to_fp16, mode = v_padded_1_mode_0, pad = v_padded_1_pad_0, x = input_11_cast_fp16)[name = string("v_padded_1_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = string("op_1394_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1394_end_0 = const()[name = string("op_1394_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1394_end_mask_0 = const()[name = string("op_1394_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = K_sliding_slot_1_cast_fp16)[name = string("op_1394_cast_fp16")]; + int32 var_1401 = const()[name = string("op_1401"), val = int32(2)]; + bool K_sliding_out_1_interleave_0 = const()[name = string("K_sliding_out_1_interleave_0"), val = bool(false)]; + tensor K_sliding_out_1_cast_fp16 = concat(axis = var_1401, interleave = K_sliding_out_1_interleave_0, values = (var_1394_cast_fp16, k_padded_1_cast_fp16))[name = string("K_sliding_out_1_cast_fp16")]; + tensor var_1417_begin_0 = const()[name = string("op_1417_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1417_end_0 = const()[name = string("op_1417_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1417_end_mask_0 = const()[name = string("op_1417_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1417_cast_fp16 = slice_by_index(begin = var_1417_begin_0, end = var_1417_end_0, end_mask = var_1417_end_mask_0, x = V_sliding_slot_1_cast_fp16)[name = string("op_1417_cast_fp16")]; + int32 var_1424 = const()[name = string("op_1424"), val = int32(2)]; + bool V_sliding_out_1_interleave_0 = const()[name = string("V_sliding_out_1_interleave_0"), val = bool(false)]; + tensor V_sliding_out_1_cast_fp16 = concat(axis = var_1424, interleave = V_sliding_out_1_interleave_0, values = (var_1417_cast_fp16, v_padded_1_cast_fp16))[name = string("V_sliding_out_1_cast_fp16")]; + tensor K_for_attn_1_begin_0 = const()[name = string("K_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_1_end_0 = const()[name = string("K_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_1_end_mask_0 = const()[name = string("K_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_1_cast_fp16 = slice_by_index(begin = K_for_attn_1_begin_0, end = K_for_attn_1_end_0, end_mask = K_for_attn_1_end_mask_0, x = K_sliding_out_1_cast_fp16)[name = string("K_for_attn_1_cast_fp16")]; + tensor V_for_attn_1_begin_0 = const()[name = string("V_for_attn_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_1_end_0 = const()[name = string("V_for_attn_1_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_1_end_mask_0 = const()[name = string("V_for_attn_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_1_cast_fp16 = slice_by_index(begin = V_for_attn_1_begin_0, end = V_for_attn_1_end_0, end_mask = V_for_attn_1_end_mask_0, x = V_sliding_out_1_cast_fp16)[name = string("V_for_attn_1_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = K_for_attn_1_cast_fp16)[name = string("transpose_301")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_300")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = V_for_attn_1_cast_fp16)[name = string("transpose_299")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_298")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_84_cast_fp16 = transpose(perm = transpose_84_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_297")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_7_cast_fp16, y = transpose_84_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_7_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_7_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_7_cast_fp16)[name = string("reduce_max_0")]; + tensor var_1465 = sub(x = x_7_cast_fp16, y = reduce_max_0)[name = string("op_1465")]; + tensor var_1471 = exp(x = var_1465)[name = string("op_1471")]; + tensor var_1481_axes_0 = const()[name = string("op_1481_axes_0"), val = tensor([-1])]; + bool var_1481_keep_dims_0 = const()[name = string("op_1481_keep_dims_0"), val = bool(true)]; + tensor var_1481 = reduce_sum(axes = var_1481_axes_0, keep_dims = var_1481_keep_dims_0, x = var_1471)[name = string("op_1481")]; + tensor var_1487_cast_fp16 = real_div(x = var_1471, y = var_1481)[name = string("op_1487_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_296")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_1487_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_1498 = const()[name = string("op_1498"), val = tensor([0, 2, 1, 3])]; + tensor var_1505 = const()[name = string("op_1505"), val = tensor([1, 1, -1])]; + tensor var_1499_cast_fp16 = transpose(perm = var_1498, x = attn_output_1_cast_fp16)[name = string("transpose_295")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_1505, x = var_1499_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_1510 = const()[name = string("op_1510"), val = tensor([0, 2, 1])]; + string var_1526_pad_type_0 = const()[name = string("op_1526_pad_type_0"), val = string("valid")]; + int32 var_1526_groups_0 = const()[name = string("op_1526_groups_0"), val = int32(1)]; + tensor var_1526_strides_0 = const()[name = string("op_1526_strides_0"), val = tensor([1])]; + tensor var_1526_pad_0 = const()[name = string("op_1526_pad_0"), val = tensor([0, 0])]; + tensor var_1526_dilations_0 = const()[name = string("op_1526_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(914596992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917218496))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1511_cast_fp16 = transpose(perm = var_1510, x = attn_output_3_cast_fp16)[name = string("transpose_294")]; + tensor var_1526_cast_fp16 = conv(dilations = var_1526_dilations_0, groups = var_1526_groups_0, pad = var_1526_pad_0, pad_type = var_1526_pad_type_0, strides = var_1526_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1511_cast_fp16)[name = string("op_1526_cast_fp16")]; + tensor var_1530 = const()[name = string("op_1530"), val = tensor([0, 2, 1])]; + int32 var_1536 = const()[name = string("op_1536"), val = int32(-1)]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_11_cast_fp16 = transpose(perm = var_1530, x = var_1526_cast_fp16)[name = string("transpose_293")]; + tensor var_1538_cast_fp16 = mul(x = x_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_1538_cast_fp16")]; + bool input_15_interleave_0 = const()[name = string("input_15_interleave_0"), val = bool(false)]; + tensor input_15_cast_fp16 = concat(axis = var_1536, interleave = input_15_interleave_0, values = (x_11_cast_fp16, var_1538_cast_fp16))[name = string("input_15_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_1533_to_fp16 = const()[name = string("op_1533_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_1533_to_fp16, x = input_15_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor var_1543_split_sizes_0 = const()[name = string("op_1543_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1543_axis_0 = const()[name = string("op_1543_axis_0"), val = int32(-1)]; + tensor var_1543_cast_fp16_0, tensor var_1543_cast_fp16_1 = split(axis = var_1543_axis_0, split_sizes = var_1543_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_1543_cast_fp16")]; + tensor layers_c2_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917221120)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_1543_cast_fp16_0, y = layers_c2_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_13_cast_fp16")]; + int32 var_1552 = const()[name = string("op_1552"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1554_cast_fp16 = mul(x = x_13_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_1554_cast_fp16")]; + bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; + tensor input_17_cast_fp16 = concat(axis = var_1552, interleave = input_17_interleave_0, values = (x_13_cast_fp16, var_1554_cast_fp16))[name = string("input_17_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_1549_to_fp16 = const()[name = string("op_1549_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_1549_to_fp16, x = input_17_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor var_1559_split_sizes_0 = const()[name = string("op_1559_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1559_axis_0 = const()[name = string("op_1559_axis_0"), val = int32(-1)]; + tensor var_1559_cast_fp16_0, tensor var_1559_cast_fp16_1 = split(axis = var_1559_axis_0, split_sizes = var_1559_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_1559_cast_fp16")]; + tensor layers_c2_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917226304)))]; + tensor h_3_cast_fp16 = mul(x = var_1559_cast_fp16_0, y = layers_c2_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_1570 = const()[name = string("op_1570"), val = tensor([0, 2, 1])]; + tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([2])]; + tensor var_1571 = transpose(perm = var_1570, x = h_3_cast_fp16)[name = string("transpose_292")]; + tensor input_19 = expand_dims(axes = input_19_axes_0, x = var_1571)[name = string("input_19")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_c2_0_mlp_gate_proj_weight_palettized, x = input_19)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_c2_0_mlp_up_proj_weight_palettized, x = input_19)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_21 = mul(x = gate_3, y = up_1)[name = string("input_21")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_c2_0_mlp_down_proj_weight_palettized, x = input_21)[name = string("mlp_out_1")]; + tensor var_1611_axes_0 = const()[name = string("op_1611_axes_0"), val = tensor([2])]; + tensor var_1611 = squeeze(axes = var_1611_axes_0, x = mlp_out_1)[name = string("op_1611")]; + tensor var_1615 = const()[name = string("op_1615"), val = tensor([0, 2, 1])]; + int32 var_1621 = const()[name = string("op_1621"), val = int32(-1)]; + fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; + tensor x_15 = transpose(perm = var_1615, x = var_1611)[name = string("transpose_291")]; + tensor var_1623 = mul(x = x_15, y = const_9_promoted)[name = string("op_1623")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23 = concat(axis = var_1621, interleave = input_23_interleave_0, values = (x_15, var_1623))[name = string("input_23")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_1618_to_fp16 = const()[name = string("op_1618_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_1618_to_fp16, x = input_23)[name = string("normed_21_cast_fp16")]; + tensor var_1628_split_sizes_0 = const()[name = string("op_1628_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1628_axis_0 = const()[name = string("op_1628_axis_0"), val = int32(-1)]; + tensor var_1628_0, tensor var_1628_1 = split(axis = var_1628_axis_0, split_sizes = var_1628_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_1628")]; + tensor hidden_states_3 = mul(x = var_1628_0, y = layers_c2_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_13_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 3072])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 1, 3328])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_1656 = const()[name = string("op_1656"), val = tensor([0, 2, 1])]; + tensor input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor([2])]; + tensor var_1657 = transpose(perm = var_1656, x = hidden_states_5_cast_fp16)[name = string("transpose_290")]; + tensor input_25 = expand_dims(axes = input_25_axes_0, x = var_1657)[name = string("input_25")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_c2_0_per_layer_input_gate_weight_palettized, x = input_25)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_1676 = const()[name = string("op_1676"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_1677_cast_fp16 = transpose(perm = var_1676, x = per_layer_slice_1_cast_fp16)[name = string("transpose_289")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_1677_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_27_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_27_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_c2_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917231488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917559232))))[name = string("layers_c2_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_c2_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_27_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_1693_axes_0 = const()[name = string("op_1693_axes_0"), val = tensor([2])]; + tensor var_1693_cast_fp16 = squeeze(axes = var_1693_axes_0, x = gated_5_cast_fp16)[name = string("op_1693_cast_fp16")]; + tensor var_1697 = const()[name = string("op_1697"), val = tensor([0, 2, 1])]; + int32 var_1703 = const()[name = string("op_1703"), val = int32(-1)]; + fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_17_cast_fp16 = transpose(perm = var_1697, x = var_1693_cast_fp16)[name = string("transpose_288")]; + tensor var_1705_cast_fp16 = mul(x = x_17_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_1705_cast_fp16")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29_cast_fp16 = concat(axis = var_1703, interleave = input_29_interleave_0, values = (x_17_cast_fp16, var_1705_cast_fp16))[name = string("input_29_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_1700_to_fp16 = const()[name = string("op_1700_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_1700_to_fp16, x = input_29_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor var_1710_split_sizes_0 = const()[name = string("op_1710_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1710_axis_0 = const()[name = string("op_1710_axis_0"), val = int32(-1)]; + tensor var_1710_cast_fp16_0, tensor var_1710_cast_fp16_1 = split(axis = var_1710_axis_0, split_sizes = var_1710_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_1710_cast_fp16")]; + tensor layers_c2_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917561856)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_1710_cast_fp16_0, y = layers_c2_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = tensor([0x1.7ep-1])]; + tensor x_19_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_11_promoted_to_fp16)[name = string("x_19_cast_fp16")]; + tensor var_1722_axes_0 = const()[name = string("op_1722_axes_0"), val = tensor([0])]; + tensor var_1722_cast_fp16 = squeeze(axes = var_1722_axes_0, x = K_sliding_out_1_cast_fp16)[name = string("op_1722_cast_fp16")]; + tensor var_1724_axes_0 = const()[name = string("op_1724_axes_0"), val = tensor([0])]; + tensor var_1724_cast_fp16 = squeeze(axes = var_1724_axes_0, x = V_sliding_out_1_cast_fp16)[name = string("op_1724_cast_fp16")]; + tensor var_1727_begin_0 = const()[name = string("op_1727_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1727_end_0 = const()[name = string("op_1727_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1727_end_mask_0 = const()[name = string("op_1727_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1727_squeeze_mask_0 = const()[name = string("op_1727_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1727_cast_fp16 = slice_by_index(begin = var_1727_begin_0, end = var_1727_end_0, end_mask = var_1727_end_mask_0, squeeze_mask = var_1727_squeeze_mask_0, x = K_sliding_in)[name = string("op_1727_cast_fp16")]; + tensor K_sliding_slot_3_axes_0 = const()[name = string("K_sliding_slot_3_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_3_cast_fp16 = expand_dims(axes = K_sliding_slot_3_axes_0, x = var_1727_cast_fp16)[name = string("K_sliding_slot_3_cast_fp16")]; + tensor var_1732_begin_0 = const()[name = string("op_1732_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_1732_end_0 = const()[name = string("op_1732_end_0"), val = tensor([2, 2, 512, 512])]; + tensor var_1732_end_mask_0 = const()[name = string("op_1732_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1732_squeeze_mask_0 = const()[name = string("op_1732_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_1732_cast_fp16 = slice_by_index(begin = var_1732_begin_0, end = var_1732_end_0, end_mask = var_1732_end_mask_0, squeeze_mask = var_1732_squeeze_mask_0, x = V_sliding_in)[name = string("op_1732_cast_fp16")]; + tensor V_sliding_slot_3_axes_0 = const()[name = string("V_sliding_slot_3_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_3_cast_fp16 = expand_dims(axes = V_sliding_slot_3_axes_0, x = var_1732_cast_fp16)[name = string("V_sliding_slot_3_cast_fp16")]; + int32 var_1739 = const()[name = string("op_1739"), val = int32(-1)]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1741_cast_fp16 = mul(x = x_19_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1741_cast_fp16")]; + bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; + tensor input_31_cast_fp16 = concat(axis = var_1739, interleave = input_31_interleave_0, values = (x_19_cast_fp16, var_1741_cast_fp16))[name = string("input_31_cast_fp16")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_1736_to_fp16 = const()[name = string("op_1736_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_1736_to_fp16, x = input_31_cast_fp16)[name = string("normed_29_cast_fp16")]; + tensor var_1746_split_sizes_0 = const()[name = string("op_1746_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1746_axis_0 = const()[name = string("op_1746_axis_0"), val = int32(-1)]; + tensor var_1746_cast_fp16_0, tensor var_1746_cast_fp16_1 = split(axis = var_1746_axis_0, split_sizes = var_1746_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_1746_cast_fp16")]; + tensor layers_c2_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917567040)))]; + tensor h_7_cast_fp16 = mul(x = var_1746_cast_fp16_0, y = layers_c2_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_1752 = const()[name = string("op_1752"), val = tensor([0, 2, 1])]; + tensor var_1755_axes_0 = const()[name = string("op_1755_axes_0"), val = tensor([2])]; + tensor var_1753_cast_fp16 = transpose(perm = var_1752, x = h_7_cast_fp16)[name = string("transpose_287")]; + tensor var_1755_cast_fp16 = expand_dims(axes = var_1755_axes_0, x = var_1753_cast_fp16)[name = string("op_1755_cast_fp16")]; + string var_1771_pad_type_0 = const()[name = string("op_1771_pad_type_0"), val = string("valid")]; + tensor var_1771_strides_0 = const()[name = string("op_1771_strides_0"), val = tensor([1, 1])]; + tensor var_1771_pad_0 = const()[name = string("op_1771_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1771_dilations_0 = const()[name = string("op_1771_dilations_0"), val = tensor([1, 1])]; + int32 var_1771_groups_0 = const()[name = string("op_1771_groups_0"), val = int32(1)]; + tensor var_1771 = conv(dilations = var_1771_dilations_0, groups = var_1771_groups_0, pad = var_1771_pad_0, pad_type = var_1771_pad_type_0, strides = var_1771_strides_0, weight = layers_c2_1_self_attn_q_proj_weight_palettized, x = var_1755_cast_fp16)[name = string("op_1771")]; + tensor var_1776 = const()[name = string("op_1776"), val = tensor([1, 8, 256, 1])]; + tensor var_1777 = reshape(shape = var_1776, x = var_1771)[name = string("op_1777")]; + tensor var_1782 = const()[name = string("op_1782"), val = tensor([0, 1, 3, 2])]; + tensor var_1792 = const()[name = string("op_1792"), val = tensor([1, 8, 256])]; + tensor var_1783 = transpose(perm = var_1782, x = var_1777)[name = string("transpose_286")]; + tensor x_21 = reshape(shape = var_1792, x = var_1783)[name = string("x_21")]; + int32 var_1798 = const()[name = string("op_1798"), val = int32(-1)]; + fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; + tensor var_1800 = mul(x = x_21, y = const_13_promoted)[name = string("op_1800")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35 = concat(axis = var_1798, interleave = input_35_interleave_0, values = (x_21, var_1800))[name = string("input_35")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1795_to_fp16 = const()[name = string("op_1795_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1795_to_fp16, x = input_35)[name = string("normed_33_cast_fp16")]; + tensor var_1805_split_sizes_0 = const()[name = string("op_1805_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1805_axis_0 = const()[name = string("op_1805_axis_0"), val = int32(-1)]; + tensor var_1805_0, tensor var_1805_1 = split(axis = var_1805_axis_0, split_sizes = var_1805_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1805")]; + tensor var_1807 = mul(x = var_1805_0, y = layers_c2_1_self_attn_q_norm_weight)[name = string("op_1807")]; + tensor var_1812 = const()[name = string("op_1812"), val = tensor([1, 8, 1, 256])]; + tensor q_11 = reshape(shape = var_1812, x = var_1807)[name = string("q_11")]; + tensor var_1814_cast_fp16 = mul(x = q_11, y = cos_s)[name = string("op_1814_cast_fp16")]; + tensor var_1815_split_sizes_0 = const()[name = string("op_1815_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1815_axis_0 = const()[name = string("op_1815_axis_0"), val = int32(-1)]; + tensor var_1815_0, tensor var_1815_1 = split(axis = var_1815_axis_0, split_sizes = var_1815_split_sizes_0, x = q_11)[name = string("op_1815")]; + fp16 const_14_promoted = const()[name = string("const_14_promoted"), val = fp16(-0x1p+0)]; + tensor var_1817 = mul(x = var_1815_1, y = const_14_promoted)[name = string("op_1817")]; + int32 var_1819 = const()[name = string("op_1819"), val = int32(-1)]; + bool var_1820_interleave_0 = const()[name = string("op_1820_interleave_0"), val = bool(false)]; + tensor var_1820 = concat(axis = var_1819, interleave = var_1820_interleave_0, values = (var_1817, var_1815_0))[name = string("op_1820")]; + tensor var_1821_cast_fp16 = mul(x = var_1820, y = sin_s)[name = string("op_1821_cast_fp16")]; + tensor q_15_cast_fp16 = add(x = var_1814_cast_fp16, y = var_1821_cast_fp16)[name = string("q_15_cast_fp16")]; + string var_1834_pad_type_0 = const()[name = string("op_1834_pad_type_0"), val = string("valid")]; + tensor var_1834_strides_0 = const()[name = string("op_1834_strides_0"), val = tensor([1, 1])]; + tensor var_1834_pad_0 = const()[name = string("op_1834_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1834_dilations_0 = const()[name = string("op_1834_dilations_0"), val = tensor([1, 1])]; + int32 var_1834_groups_0 = const()[name = string("op_1834_groups_0"), val = int32(1)]; + tensor var_1834 = conv(dilations = var_1834_dilations_0, groups = var_1834_groups_0, pad = var_1834_pad_0, pad_type = var_1834_pad_type_0, strides = var_1834_strides_0, weight = layers_c2_1_self_attn_k_proj_weight_palettized, x = var_1755_cast_fp16)[name = string("op_1834")]; + tensor var_1839 = const()[name = string("op_1839"), val = tensor([1, 2, 256, 1])]; + tensor var_1840 = reshape(shape = var_1839, x = var_1834)[name = string("op_1840")]; + tensor var_1845 = const()[name = string("op_1845"), val = tensor([0, 1, 3, 2])]; + string var_1862_pad_type_0 = const()[name = string("op_1862_pad_type_0"), val = string("valid")]; + tensor var_1862_strides_0 = const()[name = string("op_1862_strides_0"), val = tensor([1, 1])]; + tensor var_1862_pad_0 = const()[name = string("op_1862_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1862_dilations_0 = const()[name = string("op_1862_dilations_0"), val = tensor([1, 1])]; + int32 var_1862_groups_0 = const()[name = string("op_1862_groups_0"), val = int32(1)]; + tensor var_1862 = conv(dilations = var_1862_dilations_0, groups = var_1862_groups_0, pad = var_1862_pad_0, pad_type = var_1862_pad_type_0, strides = var_1862_strides_0, weight = layers_c2_1_self_attn_v_proj_weight_palettized, x = var_1755_cast_fp16)[name = string("op_1862")]; + tensor var_1867 = const()[name = string("op_1867"), val = tensor([1, 2, 256, 1])]; + tensor var_1868 = reshape(shape = var_1867, x = var_1862)[name = string("op_1868")]; + tensor var_1873 = const()[name = string("op_1873"), val = tensor([0, 1, 3, 2])]; + tensor var_1883 = const()[name = string("op_1883"), val = tensor([1, 2, 256])]; + tensor var_1846 = transpose(perm = var_1845, x = var_1840)[name = string("transpose_285")]; + tensor x_23 = reshape(shape = var_1883, x = var_1846)[name = string("x_23")]; + int32 var_1889 = const()[name = string("op_1889"), val = int32(-1)]; + fp16 const_15_promoted = const()[name = string("const_15_promoted"), val = fp16(-0x1p+0)]; + tensor var_1891 = mul(x = x_23, y = const_15_promoted)[name = string("op_1891")]; + bool input_37_interleave_0 = const()[name = string("input_37_interleave_0"), val = bool(false)]; + tensor input_37 = concat(axis = var_1889, interleave = input_37_interleave_0, values = (x_23, var_1891))[name = string("input_37")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1886_to_fp16 = const()[name = string("op_1886_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1886_to_fp16, x = input_37)[name = string("normed_37_cast_fp16")]; + tensor var_1896_split_sizes_0 = const()[name = string("op_1896_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1896_axis_0 = const()[name = string("op_1896_axis_0"), val = int32(-1)]; + tensor var_1896_0, tensor var_1896_1 = split(axis = var_1896_axis_0, split_sizes = var_1896_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1896")]; + tensor var_1898 = mul(x = var_1896_0, y = layers_c2_1_self_attn_k_norm_weight)[name = string("op_1898")]; + tensor var_1903 = const()[name = string("op_1903"), val = tensor([1, 2, 1, 256])]; + tensor q_13 = reshape(shape = var_1903, x = var_1898)[name = string("q_13")]; + fp16 var_1905_promoted = const()[name = string("op_1905_promoted"), val = fp16(0x1p+1)]; + tensor var_1874 = transpose(perm = var_1873, x = var_1868)[name = string("transpose_284")]; + tensor var_1906 = pow(x = var_1874, y = var_1905_promoted)[name = string("op_1906")]; + tensor var_1911_axes_0 = const()[name = string("op_1911_axes_0"), val = tensor([-1])]; + bool var_1911_keep_dims_0 = const()[name = string("op_1911_keep_dims_0"), val = bool(true)]; + tensor var_1911 = reduce_mean(axes = var_1911_axes_0, keep_dims = var_1911_keep_dims_0, x = var_1906)[name = string("op_1911")]; + fp16 var_1913_to_fp16 = const()[name = string("op_1913_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_3_cast_fp16 = add(x = var_1911, y = var_1913_to_fp16)[name = string("mean_sq_3_cast_fp16")]; + fp32 var_1915_epsilon_0 = const()[name = string("op_1915_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1915_cast_fp16 = rsqrt(epsilon = var_1915_epsilon_0, x = mean_sq_3_cast_fp16)[name = string("op_1915_cast_fp16")]; + tensor input_41_cast_fp16 = mul(x = var_1874, y = var_1915_cast_fp16)[name = string("input_41_cast_fp16")]; + tensor var_1917_cast_fp16 = mul(x = q_13, y = cos_s)[name = string("op_1917_cast_fp16")]; + tensor var_1918_split_sizes_0 = const()[name = string("op_1918_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1918_axis_0 = const()[name = string("op_1918_axis_0"), val = int32(-1)]; + tensor var_1918_0, tensor var_1918_1 = split(axis = var_1918_axis_0, split_sizes = var_1918_split_sizes_0, x = q_13)[name = string("op_1918")]; + fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)]; + tensor var_1920 = mul(x = var_1918_1, y = const_16_promoted)[name = string("op_1920")]; + int32 var_1922 = const()[name = string("op_1922"), val = int32(-1)]; + bool var_1923_interleave_0 = const()[name = string("op_1923_interleave_0"), val = bool(false)]; + tensor var_1923 = concat(axis = var_1922, interleave = var_1923_interleave_0, values = (var_1920, var_1918_0))[name = string("op_1923")]; + tensor var_1924_cast_fp16 = mul(x = var_1923, y = sin_s)[name = string("op_1924_cast_fp16")]; + tensor input_39_cast_fp16 = add(x = var_1917_cast_fp16, y = var_1924_cast_fp16)[name = string("input_39_cast_fp16")]; + tensor k_padded_3_pad_0 = const()[name = string("k_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_3_mode_0 = const()[name = string("k_padded_3_mode_0"), val = string("constant")]; + fp16 const_17_to_fp16 = const()[name = string("const_17_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_3_cast_fp16 = pad(constant_val = const_17_to_fp16, mode = k_padded_3_mode_0, pad = k_padded_3_pad_0, x = input_39_cast_fp16)[name = string("k_padded_3_cast_fp16")]; + tensor v_padded_3_pad_0 = const()[name = string("v_padded_3_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_3_mode_0 = const()[name = string("v_padded_3_mode_0"), val = string("constant")]; + fp16 const_18_to_fp16 = const()[name = string("const_18_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_3_cast_fp16 = pad(constant_val = const_18_to_fp16, mode = v_padded_3_mode_0, pad = v_padded_3_pad_0, x = input_41_cast_fp16)[name = string("v_padded_3_cast_fp16")]; + tensor var_1953_begin_0 = const()[name = string("op_1953_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1953_end_0 = const()[name = string("op_1953_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1953_end_mask_0 = const()[name = string("op_1953_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1953_cast_fp16 = slice_by_index(begin = var_1953_begin_0, end = var_1953_end_0, end_mask = var_1953_end_mask_0, x = K_sliding_slot_3_cast_fp16)[name = string("op_1953_cast_fp16")]; + int32 var_1960 = const()[name = string("op_1960"), val = int32(2)]; + bool K_sliding_out_3_interleave_0 = const()[name = string("K_sliding_out_3_interleave_0"), val = bool(false)]; + tensor K_sliding_out_3_cast_fp16 = concat(axis = var_1960, interleave = K_sliding_out_3_interleave_0, values = (var_1953_cast_fp16, k_padded_3_cast_fp16))[name = string("K_sliding_out_3_cast_fp16")]; + tensor var_1976_begin_0 = const()[name = string("op_1976_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_1976_end_0 = const()[name = string("op_1976_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_1976_end_mask_0 = const()[name = string("op_1976_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1976_cast_fp16 = slice_by_index(begin = var_1976_begin_0, end = var_1976_end_0, end_mask = var_1976_end_mask_0, x = V_sliding_slot_3_cast_fp16)[name = string("op_1976_cast_fp16")]; + int32 var_1983 = const()[name = string("op_1983"), val = int32(2)]; + bool V_sliding_out_3_interleave_0 = const()[name = string("V_sliding_out_3_interleave_0"), val = bool(false)]; + tensor V_sliding_out_3_cast_fp16 = concat(axis = var_1983, interleave = V_sliding_out_3_interleave_0, values = (var_1976_cast_fp16, v_padded_3_cast_fp16))[name = string("V_sliding_out_3_cast_fp16")]; + tensor K_for_attn_3_begin_0 = const()[name = string("K_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_3_end_0 = const()[name = string("K_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_3_end_mask_0 = const()[name = string("K_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_3_cast_fp16 = slice_by_index(begin = K_for_attn_3_begin_0, end = K_for_attn_3_end_0, end_mask = K_for_attn_3_end_mask_0, x = K_sliding_out_3_cast_fp16)[name = string("K_for_attn_3_cast_fp16")]; + tensor V_for_attn_3_begin_0 = const()[name = string("V_for_attn_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_3_end_0 = const()[name = string("V_for_attn_3_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_3_end_mask_0 = const()[name = string("V_for_attn_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_3_cast_fp16 = slice_by_index(begin = V_for_attn_3_begin_0, end = V_for_attn_3_end_0, end_mask = V_for_attn_3_end_mask_0, x = V_sliding_out_3_cast_fp16)[name = string("V_for_attn_3_cast_fp16")]; + tensor transpose_4_perm_0 = const()[name = string("transpose_4_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_2_reps_0 = const()[name = string("tile_2_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = K_for_attn_3_cast_fp16)[name = string("transpose_283")]; + tensor tile_2_cast_fp16 = tile(reps = tile_2_reps_0, x = transpose_4_cast_fp16)[name = string("tile_2_cast_fp16")]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_4, x = tile_2_cast_fp16)[name = string("reshape_4_cast_fp16")]; + tensor transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = reshape_4_cast_fp16)[name = string("transpose_282")]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_5, x = transpose_5_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_6_perm_0 = const()[name = string("transpose_6_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_3_reps_0 = const()[name = string("tile_3_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = V_for_attn_3_cast_fp16)[name = string("transpose_281")]; + tensor tile_3_cast_fp16 = tile(reps = tile_3_reps_0, x = transpose_6_cast_fp16)[name = string("tile_3_cast_fp16")]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_6, x = tile_3_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor transpose_7_perm_0 = const()[name = string("transpose_7_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = reshape_6_cast_fp16)[name = string("transpose_280")]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_7, x = transpose_7_cast_fp16)[name = string("reshape_7_cast_fp16")]; + tensor V_expanded_3_perm_0 = const()[name = string("V_expanded_3_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor transpose_85_cast_fp16 = transpose(perm = transpose_85_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_279")]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_15_cast_fp16, y = transpose_85_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_27_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_27_cast_fp16)[name = string("reduce_max_1")]; + tensor var_2024 = sub(x = x_27_cast_fp16, y = reduce_max_1)[name = string("op_2024")]; + tensor var_2030 = exp(x = var_2024)[name = string("op_2030")]; + tensor var_2040_axes_0 = const()[name = string("op_2040_axes_0"), val = tensor([-1])]; + bool var_2040_keep_dims_0 = const()[name = string("op_2040_keep_dims_0"), val = bool(true)]; + tensor var_2040 = reduce_sum(axes = var_2040_axes_0, keep_dims = var_2040_keep_dims_0, x = var_2030)[name = string("op_2040")]; + tensor var_2046_cast_fp16 = real_div(x = var_2030, y = var_2040)[name = string("op_2046_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor V_expanded_3_cast_fp16 = transpose(perm = V_expanded_3_perm_0, x = reshape_7_cast_fp16)[name = string("transpose_278")]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_2046_cast_fp16, y = V_expanded_3_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_2057 = const()[name = string("op_2057"), val = tensor([0, 2, 1, 3])]; + tensor var_2064 = const()[name = string("op_2064"), val = tensor([1, 1, -1])]; + tensor var_2058_cast_fp16 = transpose(perm = var_2057, x = attn_output_7_cast_fp16)[name = string("transpose_277")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_2064, x = var_2058_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_2069 = const()[name = string("op_2069"), val = tensor([0, 2, 1])]; + string var_2085_pad_type_0 = const()[name = string("op_2085_pad_type_0"), val = string("valid")]; + int32 var_2085_groups_0 = const()[name = string("op_2085_groups_0"), val = int32(1)]; + tensor var_2085_strides_0 = const()[name = string("op_2085_strides_0"), val = tensor([1])]; + tensor var_2085_pad_0 = const()[name = string("op_2085_pad_0"), val = tensor([0, 0])]; + tensor var_2085_dilations_0 = const()[name = string("op_2085_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917572224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920193728))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2070_cast_fp16 = transpose(perm = var_2069, x = attn_output_9_cast_fp16)[name = string("transpose_276")]; + tensor var_2085_cast_fp16 = conv(dilations = var_2085_dilations_0, groups = var_2085_groups_0, pad = var_2085_pad_0, pad_type = var_2085_pad_type_0, strides = var_2085_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2070_cast_fp16)[name = string("op_2085_cast_fp16")]; + tensor var_2089 = const()[name = string("op_2089"), val = tensor([0, 2, 1])]; + int32 var_2095 = const()[name = string("op_2095"), val = int32(-1)]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_31_cast_fp16 = transpose(perm = var_2089, x = var_2085_cast_fp16)[name = string("transpose_275")]; + tensor var_2097_cast_fp16 = mul(x = x_31_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_2097_cast_fp16")]; + bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; + tensor input_45_cast_fp16 = concat(axis = var_2095, interleave = input_45_interleave_0, values = (x_31_cast_fp16, var_2097_cast_fp16))[name = string("input_45_cast_fp16")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_2092_to_fp16 = const()[name = string("op_2092_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2092_to_fp16, x = input_45_cast_fp16)[name = string("normed_41_cast_fp16")]; + tensor var_2102_split_sizes_0 = const()[name = string("op_2102_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2102_axis_0 = const()[name = string("op_2102_axis_0"), val = int32(-1)]; + tensor var_2102_cast_fp16_0, tensor var_2102_cast_fp16_1 = split(axis = var_2102_axis_0, split_sizes = var_2102_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_2102_cast_fp16")]; + tensor layers_c2_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920196352)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_2102_cast_fp16_0, y = layers_c2_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_19_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_33_cast_fp16")]; + int32 var_2111 = const()[name = string("op_2111"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2113_cast_fp16 = mul(x = x_33_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_2113_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_2111, interleave = input_47_interleave_0, values = (x_33_cast_fp16, var_2113_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_2108_to_fp16 = const()[name = string("op_2108_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2108_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_2118_split_sizes_0 = const()[name = string("op_2118_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2118_axis_0 = const()[name = string("op_2118_axis_0"), val = int32(-1)]; + tensor var_2118_cast_fp16_0, tensor var_2118_cast_fp16_1 = split(axis = var_2118_axis_0, split_sizes = var_2118_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_2118_cast_fp16")]; + tensor layers_c2_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920201536)))]; + tensor h_9_cast_fp16 = mul(x = var_2118_cast_fp16_0, y = layers_c2_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_2129 = const()[name = string("op_2129"), val = tensor([0, 2, 1])]; + tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; + tensor var_2130 = transpose(perm = var_2129, x = h_9_cast_fp16)[name = string("transpose_274")]; + tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_2130)[name = string("input_49")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_c2_1_mlp_gate_proj_weight_palettized, x = input_49)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_c2_1_mlp_up_proj_weight_palettized, x = input_49)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_51 = mul(x = gate_7, y = up_3)[name = string("input_51")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_c2_1_mlp_down_proj_weight_palettized, x = input_51)[name = string("mlp_out_3")]; + tensor var_2170_axes_0 = const()[name = string("op_2170_axes_0"), val = tensor([2])]; + tensor var_2170 = squeeze(axes = var_2170_axes_0, x = mlp_out_3)[name = string("op_2170")]; + tensor var_2174 = const()[name = string("op_2174"), val = tensor([0, 2, 1])]; + int32 var_2180 = const()[name = string("op_2180"), val = int32(-1)]; + fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; + tensor x_35 = transpose(perm = var_2174, x = var_2170)[name = string("transpose_273")]; + tensor var_2182 = mul(x = x_35, y = const_21_promoted)[name = string("op_2182")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53 = concat(axis = var_2180, interleave = input_53_interleave_0, values = (x_35, var_2182))[name = string("input_53")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_2177_to_fp16 = const()[name = string("op_2177_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_2177_to_fp16, x = input_53)[name = string("normed_49_cast_fp16")]; + tensor var_2187_split_sizes_0 = const()[name = string("op_2187_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2187_axis_0 = const()[name = string("op_2187_axis_0"), val = int32(-1)]; + tensor var_2187_0, tensor var_2187_1 = split(axis = var_2187_axis_0, split_sizes = var_2187_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_2187")]; + tensor hidden_states_13 = mul(x = var_2187_0, y = layers_c2_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_33_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 3328])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 1, 3584])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_2215 = const()[name = string("op_2215"), val = tensor([0, 2, 1])]; + tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; + tensor var_2216 = transpose(perm = var_2215, x = hidden_states_15_cast_fp16)[name = string("transpose_272")]; + tensor input_55 = expand_dims(axes = input_55_axes_0, x = var_2216)[name = string("input_55")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_c2_1_per_layer_input_gate_weight_palettized, x = input_55)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_2235 = const()[name = string("op_2235"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_2236_cast_fp16 = transpose(perm = var_2235, x = per_layer_slice_3_cast_fp16)[name = string("transpose_271")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_2236_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_57_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_57_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_c2_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920206720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920534464))))[name = string("layers_c2_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_c2_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_2252_axes_0 = const()[name = string("op_2252_axes_0"), val = tensor([2])]; + tensor var_2252_cast_fp16 = squeeze(axes = var_2252_axes_0, x = gated_11_cast_fp16)[name = string("op_2252_cast_fp16")]; + tensor var_2256 = const()[name = string("op_2256"), val = tensor([0, 2, 1])]; + int32 var_2262 = const()[name = string("op_2262"), val = int32(-1)]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_37_cast_fp16 = transpose(perm = var_2256, x = var_2252_cast_fp16)[name = string("transpose_270")]; + tensor var_2264_cast_fp16 = mul(x = x_37_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_2264_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_2262, interleave = input_59_interleave_0, values = (x_37_cast_fp16, var_2264_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_2259_to_fp16 = const()[name = string("op_2259_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_2259_to_fp16, x = input_59_cast_fp16)[name = string("normed_53_cast_fp16")]; + tensor var_2269_split_sizes_0 = const()[name = string("op_2269_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2269_axis_0 = const()[name = string("op_2269_axis_0"), val = int32(-1)]; + tensor var_2269_cast_fp16_0, tensor var_2269_cast_fp16_1 = split(axis = var_2269_axis_0, split_sizes = var_2269_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_2269_cast_fp16")]; + tensor layers_c2_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920537088)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_2269_cast_fp16_0, y = layers_c2_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor([0x1.6cp-1])]; + tensor x_39_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_39_cast_fp16")]; + tensor var_2281_axes_0 = const()[name = string("op_2281_axes_0"), val = tensor([0])]; + tensor var_2281_cast_fp16 = squeeze(axes = var_2281_axes_0, x = K_sliding_out_3_cast_fp16)[name = string("op_2281_cast_fp16")]; + tensor var_2283_axes_0 = const()[name = string("op_2283_axes_0"), val = tensor([0])]; + tensor var_2283_cast_fp16 = squeeze(axes = var_2283_axes_0, x = V_sliding_out_3_cast_fp16)[name = string("op_2283_cast_fp16")]; + tensor var_2286_begin_0 = const()[name = string("op_2286_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_2286_end_0 = const()[name = string("op_2286_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2286_end_mask_0 = const()[name = string("op_2286_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2286_squeeze_mask_0 = const()[name = string("op_2286_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2286_cast_fp16 = slice_by_index(begin = var_2286_begin_0, end = var_2286_end_0, end_mask = var_2286_end_mask_0, squeeze_mask = var_2286_squeeze_mask_0, x = K_sliding_in)[name = string("op_2286_cast_fp16")]; + tensor K_sliding_slot_5_axes_0 = const()[name = string("K_sliding_slot_5_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_5_cast_fp16 = expand_dims(axes = K_sliding_slot_5_axes_0, x = var_2286_cast_fp16)[name = string("K_sliding_slot_5_cast_fp16")]; + tensor var_2291_begin_0 = const()[name = string("op_2291_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_2291_end_0 = const()[name = string("op_2291_end_0"), val = tensor([3, 2, 512, 512])]; + tensor var_2291_end_mask_0 = const()[name = string("op_2291_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2291_squeeze_mask_0 = const()[name = string("op_2291_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2291_cast_fp16 = slice_by_index(begin = var_2291_begin_0, end = var_2291_end_0, end_mask = var_2291_end_mask_0, squeeze_mask = var_2291_squeeze_mask_0, x = V_sliding_in)[name = string("op_2291_cast_fp16")]; + tensor V_sliding_slot_5_axes_0 = const()[name = string("V_sliding_slot_5_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_5_cast_fp16 = expand_dims(axes = V_sliding_slot_5_axes_0, x = var_2291_cast_fp16)[name = string("V_sliding_slot_5_cast_fp16")]; + int32 var_2298 = const()[name = string("op_2298"), val = int32(-1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2300_cast_fp16 = mul(x = x_39_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_2300_cast_fp16")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61_cast_fp16 = concat(axis = var_2298, interleave = input_61_interleave_0, values = (x_39_cast_fp16, var_2300_cast_fp16))[name = string("input_61_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_2295_to_fp16 = const()[name = string("op_2295_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_2295_to_fp16, x = input_61_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_2305_split_sizes_0 = const()[name = string("op_2305_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2305_axis_0 = const()[name = string("op_2305_axis_0"), val = int32(-1)]; + tensor var_2305_cast_fp16_0, tensor var_2305_cast_fp16_1 = split(axis = var_2305_axis_0, split_sizes = var_2305_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_2305_cast_fp16")]; + tensor layers_c2_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920542272)))]; + tensor h_13_cast_fp16 = mul(x = var_2305_cast_fp16_0, y = layers_c2_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_2311 = const()[name = string("op_2311"), val = tensor([0, 2, 1])]; + tensor var_2314_axes_0 = const()[name = string("op_2314_axes_0"), val = tensor([2])]; + tensor var_2312_cast_fp16 = transpose(perm = var_2311, x = h_13_cast_fp16)[name = string("transpose_269")]; + tensor var_2314_cast_fp16 = expand_dims(axes = var_2314_axes_0, x = var_2312_cast_fp16)[name = string("op_2314_cast_fp16")]; + string var_2330_pad_type_0 = const()[name = string("op_2330_pad_type_0"), val = string("valid")]; + tensor var_2330_strides_0 = const()[name = string("op_2330_strides_0"), val = tensor([1, 1])]; + tensor var_2330_pad_0 = const()[name = string("op_2330_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2330_dilations_0 = const()[name = string("op_2330_dilations_0"), val = tensor([1, 1])]; + int32 var_2330_groups_0 = const()[name = string("op_2330_groups_0"), val = int32(1)]; + tensor var_2330 = conv(dilations = var_2330_dilations_0, groups = var_2330_groups_0, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2330_strides_0, weight = layers_c2_2_self_attn_q_proj_weight_palettized, x = var_2314_cast_fp16)[name = string("op_2330")]; + tensor var_2335 = const()[name = string("op_2335"), val = tensor([1, 8, 256, 1])]; + tensor var_2336 = reshape(shape = var_2335, x = var_2330)[name = string("op_2336")]; + tensor var_2341 = const()[name = string("op_2341"), val = tensor([0, 1, 3, 2])]; + tensor var_2351 = const()[name = string("op_2351"), val = tensor([1, 8, 256])]; + tensor var_2342 = transpose(perm = var_2341, x = var_2336)[name = string("transpose_268")]; + tensor x_41 = reshape(shape = var_2351, x = var_2342)[name = string("x_41")]; + int32 var_2357 = const()[name = string("op_2357"), val = int32(-1)]; + fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; + tensor var_2359 = mul(x = x_41, y = const_25_promoted)[name = string("op_2359")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_2357, interleave = input_65_interleave_0, values = (x_41, var_2359))[name = string("input_65")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_2354_to_fp16 = const()[name = string("op_2354_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_2354_to_fp16, x = input_65)[name = string("normed_61_cast_fp16")]; + tensor var_2364_split_sizes_0 = const()[name = string("op_2364_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2364_axis_0 = const()[name = string("op_2364_axis_0"), val = int32(-1)]; + tensor var_2364_0, tensor var_2364_1 = split(axis = var_2364_axis_0, split_sizes = var_2364_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_2364")]; + tensor var_2366 = mul(x = var_2364_0, y = layers_c2_2_self_attn_q_norm_weight)[name = string("op_2366")]; + tensor var_2371 = const()[name = string("op_2371"), val = tensor([1, 8, 1, 256])]; + tensor q_19 = reshape(shape = var_2371, x = var_2366)[name = string("q_19")]; + tensor var_2373_cast_fp16 = mul(x = q_19, y = cos_s)[name = string("op_2373_cast_fp16")]; + tensor var_2374_split_sizes_0 = const()[name = string("op_2374_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2374_axis_0 = const()[name = string("op_2374_axis_0"), val = int32(-1)]; + tensor var_2374_0, tensor var_2374_1 = split(axis = var_2374_axis_0, split_sizes = var_2374_split_sizes_0, x = q_19)[name = string("op_2374")]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_2376 = mul(x = var_2374_1, y = const_26_promoted)[name = string("op_2376")]; + int32 var_2378 = const()[name = string("op_2378"), val = int32(-1)]; + bool var_2379_interleave_0 = const()[name = string("op_2379_interleave_0"), val = bool(false)]; + tensor var_2379 = concat(axis = var_2378, interleave = var_2379_interleave_0, values = (var_2376, var_2374_0))[name = string("op_2379")]; + tensor var_2380_cast_fp16 = mul(x = var_2379, y = sin_s)[name = string("op_2380_cast_fp16")]; + tensor q_23_cast_fp16 = add(x = var_2373_cast_fp16, y = var_2380_cast_fp16)[name = string("q_23_cast_fp16")]; + string var_2393_pad_type_0 = const()[name = string("op_2393_pad_type_0"), val = string("valid")]; + tensor var_2393_strides_0 = const()[name = string("op_2393_strides_0"), val = tensor([1, 1])]; + tensor var_2393_pad_0 = const()[name = string("op_2393_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2393_dilations_0 = const()[name = string("op_2393_dilations_0"), val = tensor([1, 1])]; + int32 var_2393_groups_0 = const()[name = string("op_2393_groups_0"), val = int32(1)]; + tensor var_2393 = conv(dilations = var_2393_dilations_0, groups = var_2393_groups_0, pad = var_2393_pad_0, pad_type = var_2393_pad_type_0, strides = var_2393_strides_0, weight = layers_c2_2_self_attn_k_proj_weight_palettized, x = var_2314_cast_fp16)[name = string("op_2393")]; + tensor var_2398 = const()[name = string("op_2398"), val = tensor([1, 2, 256, 1])]; + tensor var_2399 = reshape(shape = var_2398, x = var_2393)[name = string("op_2399")]; + tensor var_2404 = const()[name = string("op_2404"), val = tensor([0, 1, 3, 2])]; + string var_2421_pad_type_0 = const()[name = string("op_2421_pad_type_0"), val = string("valid")]; + tensor var_2421_strides_0 = const()[name = string("op_2421_strides_0"), val = tensor([1, 1])]; + tensor var_2421_pad_0 = const()[name = string("op_2421_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2421_dilations_0 = const()[name = string("op_2421_dilations_0"), val = tensor([1, 1])]; + int32 var_2421_groups_0 = const()[name = string("op_2421_groups_0"), val = int32(1)]; + tensor var_2421 = conv(dilations = var_2421_dilations_0, groups = var_2421_groups_0, pad = var_2421_pad_0, pad_type = var_2421_pad_type_0, strides = var_2421_strides_0, weight = layers_c2_2_self_attn_v_proj_weight_palettized, x = var_2314_cast_fp16)[name = string("op_2421")]; + tensor var_2426 = const()[name = string("op_2426"), val = tensor([1, 2, 256, 1])]; + tensor var_2427 = reshape(shape = var_2426, x = var_2421)[name = string("op_2427")]; + tensor var_2432 = const()[name = string("op_2432"), val = tensor([0, 1, 3, 2])]; + tensor var_2442 = const()[name = string("op_2442"), val = tensor([1, 2, 256])]; + tensor var_2405 = transpose(perm = var_2404, x = var_2399)[name = string("transpose_267")]; + tensor x_43 = reshape(shape = var_2442, x = var_2405)[name = string("x_43")]; + int32 var_2448 = const()[name = string("op_2448"), val = int32(-1)]; + fp16 const_27_promoted = const()[name = string("const_27_promoted"), val = fp16(-0x1p+0)]; + tensor var_2450 = mul(x = x_43, y = const_27_promoted)[name = string("op_2450")]; + bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; + tensor input_67 = concat(axis = var_2448, interleave = input_67_interleave_0, values = (x_43, var_2450))[name = string("input_67")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_2445_to_fp16, x = input_67)[name = string("normed_65_cast_fp16")]; + tensor var_2455_split_sizes_0 = const()[name = string("op_2455_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2455_axis_0 = const()[name = string("op_2455_axis_0"), val = int32(-1)]; + tensor var_2455_0, tensor var_2455_1 = split(axis = var_2455_axis_0, split_sizes = var_2455_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_2455")]; + tensor var_2457 = mul(x = var_2455_0, y = layers_c2_2_self_attn_k_norm_weight)[name = string("op_2457")]; + tensor var_2462 = const()[name = string("op_2462"), val = tensor([1, 2, 1, 256])]; + tensor q_21 = reshape(shape = var_2462, x = var_2457)[name = string("q_21")]; + fp16 var_2464_promoted = const()[name = string("op_2464_promoted"), val = fp16(0x1p+1)]; + tensor var_2433 = transpose(perm = var_2432, x = var_2427)[name = string("transpose_266")]; + tensor var_2465 = pow(x = var_2433, y = var_2464_promoted)[name = string("op_2465")]; + tensor var_2470_axes_0 = const()[name = string("op_2470_axes_0"), val = tensor([-1])]; + bool var_2470_keep_dims_0 = const()[name = string("op_2470_keep_dims_0"), val = bool(true)]; + tensor var_2470 = reduce_mean(axes = var_2470_axes_0, keep_dims = var_2470_keep_dims_0, x = var_2465)[name = string("op_2470")]; + fp16 var_2472_to_fp16 = const()[name = string("op_2472_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_5_cast_fp16 = add(x = var_2470, y = var_2472_to_fp16)[name = string("mean_sq_5_cast_fp16")]; + fp32 var_2474_epsilon_0 = const()[name = string("op_2474_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2474_cast_fp16 = rsqrt(epsilon = var_2474_epsilon_0, x = mean_sq_5_cast_fp16)[name = string("op_2474_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_2433, y = var_2474_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_2476_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_2476_cast_fp16")]; + tensor var_2477_split_sizes_0 = const()[name = string("op_2477_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2477_axis_0 = const()[name = string("op_2477_axis_0"), val = int32(-1)]; + tensor var_2477_0, tensor var_2477_1 = split(axis = var_2477_axis_0, split_sizes = var_2477_split_sizes_0, x = q_21)[name = string("op_2477")]; + fp16 const_28_promoted = const()[name = string("const_28_promoted"), val = fp16(-0x1p+0)]; + tensor var_2479 = mul(x = var_2477_1, y = const_28_promoted)[name = string("op_2479")]; + int32 var_2481 = const()[name = string("op_2481"), val = int32(-1)]; + bool var_2482_interleave_0 = const()[name = string("op_2482_interleave_0"), val = bool(false)]; + tensor var_2482 = concat(axis = var_2481, interleave = var_2482_interleave_0, values = (var_2479, var_2477_0))[name = string("op_2482")]; + tensor var_2483_cast_fp16 = mul(x = var_2482, y = sin_s)[name = string("op_2483_cast_fp16")]; + tensor input_69_cast_fp16 = add(x = var_2476_cast_fp16, y = var_2483_cast_fp16)[name = string("input_69_cast_fp16")]; + tensor k_padded_5_pad_0 = const()[name = string("k_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_5_mode_0 = const()[name = string("k_padded_5_mode_0"), val = string("constant")]; + fp16 const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_5_cast_fp16 = pad(constant_val = const_29_to_fp16, mode = k_padded_5_mode_0, pad = k_padded_5_pad_0, x = input_69_cast_fp16)[name = string("k_padded_5_cast_fp16")]; + tensor v_padded_5_pad_0 = const()[name = string("v_padded_5_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_5_mode_0 = const()[name = string("v_padded_5_mode_0"), val = string("constant")]; + fp16 const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_5_cast_fp16 = pad(constant_val = const_30_to_fp16, mode = v_padded_5_mode_0, pad = v_padded_5_pad_0, x = input_71_cast_fp16)[name = string("v_padded_5_cast_fp16")]; + tensor var_2512_begin_0 = const()[name = string("op_2512_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2512_end_0 = const()[name = string("op_2512_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2512_end_mask_0 = const()[name = string("op_2512_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2512_cast_fp16 = slice_by_index(begin = var_2512_begin_0, end = var_2512_end_0, end_mask = var_2512_end_mask_0, x = K_sliding_slot_5_cast_fp16)[name = string("op_2512_cast_fp16")]; + int32 var_2519 = const()[name = string("op_2519"), val = int32(2)]; + bool K_sliding_out_5_interleave_0 = const()[name = string("K_sliding_out_5_interleave_0"), val = bool(false)]; + tensor K_sliding_out_5_cast_fp16 = concat(axis = var_2519, interleave = K_sliding_out_5_interleave_0, values = (var_2512_cast_fp16, k_padded_5_cast_fp16))[name = string("K_sliding_out_5_cast_fp16")]; + tensor var_2535_begin_0 = const()[name = string("op_2535_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_2535_end_0 = const()[name = string("op_2535_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_2535_end_mask_0 = const()[name = string("op_2535_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2535_cast_fp16 = slice_by_index(begin = var_2535_begin_0, end = var_2535_end_0, end_mask = var_2535_end_mask_0, x = V_sliding_slot_5_cast_fp16)[name = string("op_2535_cast_fp16")]; + int32 var_2542 = const()[name = string("op_2542"), val = int32(2)]; + bool V_sliding_out_5_interleave_0 = const()[name = string("V_sliding_out_5_interleave_0"), val = bool(false)]; + tensor V_sliding_out_5_cast_fp16 = concat(axis = var_2542, interleave = V_sliding_out_5_interleave_0, values = (var_2535_cast_fp16, v_padded_5_cast_fp16))[name = string("V_sliding_out_5_cast_fp16")]; + tensor K_for_attn_5_begin_0 = const()[name = string("K_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_5_end_0 = const()[name = string("K_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_5_end_mask_0 = const()[name = string("K_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_5_cast_fp16 = slice_by_index(begin = K_for_attn_5_begin_0, end = K_for_attn_5_end_0, end_mask = K_for_attn_5_end_mask_0, x = K_sliding_out_5_cast_fp16)[name = string("K_for_attn_5_cast_fp16")]; + tensor V_for_attn_5_begin_0 = const()[name = string("V_for_attn_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_5_end_0 = const()[name = string("V_for_attn_5_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_5_end_mask_0 = const()[name = string("V_for_attn_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_5_cast_fp16 = slice_by_index(begin = V_for_attn_5_begin_0, end = V_for_attn_5_end_0, end_mask = V_for_attn_5_end_mask_0, x = V_sliding_out_5_cast_fp16)[name = string("V_for_attn_5_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = K_for_attn_5_cast_fp16)[name = string("transpose_265")]; + tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_8, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_264")]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_9, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = V_for_attn_5_cast_fp16)[name = string("transpose_263")]; + tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_10, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; + tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_262")]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_11, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor transpose_86_cast_fp16 = transpose(perm = transpose_86_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_261")]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_23_cast_fp16, y = transpose_86_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_47_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_47_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_47_cast_fp16)[name = string("reduce_max_2")]; + tensor var_2583 = sub(x = x_47_cast_fp16, y = reduce_max_2)[name = string("op_2583")]; + tensor var_2589 = exp(x = var_2583)[name = string("op_2589")]; + tensor var_2599_axes_0 = const()[name = string("op_2599_axes_0"), val = tensor([-1])]; + bool var_2599_keep_dims_0 = const()[name = string("op_2599_keep_dims_0"), val = bool(true)]; + tensor var_2599 = reduce_sum(axes = var_2599_axes_0, keep_dims = var_2599_keep_dims_0, x = var_2589)[name = string("op_2599")]; + tensor var_2605_cast_fp16 = real_div(x = var_2589, y = var_2599)[name = string("op_2605_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_260")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_2605_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_2616 = const()[name = string("op_2616"), val = tensor([0, 2, 1, 3])]; + tensor var_2623 = const()[name = string("op_2623"), val = tensor([1, 1, -1])]; + tensor var_2617_cast_fp16 = transpose(perm = var_2616, x = attn_output_13_cast_fp16)[name = string("transpose_259")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2623, x = var_2617_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2628 = const()[name = string("op_2628"), val = tensor([0, 2, 1])]; + string var_2644_pad_type_0 = const()[name = string("op_2644_pad_type_0"), val = string("valid")]; + int32 var_2644_groups_0 = const()[name = string("op_2644_groups_0"), val = int32(1)]; + tensor var_2644_strides_0 = const()[name = string("op_2644_strides_0"), val = tensor([1])]; + tensor var_2644_pad_0 = const()[name = string("op_2644_pad_0"), val = tensor([0, 0])]; + tensor var_2644_dilations_0 = const()[name = string("op_2644_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(920547456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923168960))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2629_cast_fp16 = transpose(perm = var_2628, x = attn_output_15_cast_fp16)[name = string("transpose_258")]; + tensor var_2644_cast_fp16 = conv(dilations = var_2644_dilations_0, groups = var_2644_groups_0, pad = var_2644_pad_0, pad_type = var_2644_pad_type_0, strides = var_2644_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2629_cast_fp16)[name = string("op_2644_cast_fp16")]; + tensor var_2648 = const()[name = string("op_2648"), val = tensor([0, 2, 1])]; + int32 var_2654 = const()[name = string("op_2654"), val = int32(-1)]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_51_cast_fp16 = transpose(perm = var_2648, x = var_2644_cast_fp16)[name = string("transpose_257")]; + tensor var_2656_cast_fp16 = mul(x = x_51_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2656_cast_fp16")]; + bool input_75_interleave_0 = const()[name = string("input_75_interleave_0"), val = bool(false)]; + tensor input_75_cast_fp16 = concat(axis = var_2654, interleave = input_75_interleave_0, values = (x_51_cast_fp16, var_2656_cast_fp16))[name = string("input_75_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_2651_to_fp16, x = input_75_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor var_2661_split_sizes_0 = const()[name = string("op_2661_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2661_axis_0 = const()[name = string("op_2661_axis_0"), val = int32(-1)]; + tensor var_2661_cast_fp16_0, tensor var_2661_cast_fp16_1 = split(axis = var_2661_axis_0, split_sizes = var_2661_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_2661_cast_fp16")]; + tensor layers_c2_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923171584)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_2661_cast_fp16_0, y = layers_c2_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_53_cast_fp16 = add(x = x_39_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_53_cast_fp16")]; + int32 var_2670 = const()[name = string("op_2670"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2672_cast_fp16 = mul(x = x_53_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2672_cast_fp16")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77_cast_fp16 = concat(axis = var_2670, interleave = input_77_interleave_0, values = (x_53_cast_fp16, var_2672_cast_fp16))[name = string("input_77_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_2667_to_fp16 = const()[name = string("op_2667_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_2667_to_fp16, x = input_77_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_2677_split_sizes_0 = const()[name = string("op_2677_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2677_axis_0 = const()[name = string("op_2677_axis_0"), val = int32(-1)]; + tensor var_2677_cast_fp16_0, tensor var_2677_cast_fp16_1 = split(axis = var_2677_axis_0, split_sizes = var_2677_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_2677_cast_fp16")]; + tensor layers_c2_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923176768)))]; + tensor h_15_cast_fp16 = mul(x = var_2677_cast_fp16_0, y = layers_c2_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_2688 = const()[name = string("op_2688"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_2689 = transpose(perm = var_2688, x = h_15_cast_fp16)[name = string("transpose_256")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_2689)[name = string("input_79")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_c2_2_mlp_gate_proj_weight_palettized, x = input_79)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_c2_2_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_81 = mul(x = gate_11, y = up_5)[name = string("input_81")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_c2_2_mlp_down_proj_weight_palettized, x = input_81)[name = string("mlp_out_5")]; + tensor var_2729_axes_0 = const()[name = string("op_2729_axes_0"), val = tensor([2])]; + tensor var_2729 = squeeze(axes = var_2729_axes_0, x = mlp_out_5)[name = string("op_2729")]; + tensor var_2733 = const()[name = string("op_2733"), val = tensor([0, 2, 1])]; + int32 var_2739 = const()[name = string("op_2739"), val = int32(-1)]; + fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; + tensor x_55 = transpose(perm = var_2733, x = var_2729)[name = string("transpose_255")]; + tensor var_2741 = mul(x = x_55, y = const_33_promoted)[name = string("op_2741")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83 = concat(axis = var_2739, interleave = input_83_interleave_0, values = (x_55, var_2741))[name = string("input_83")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_2736_to_fp16 = const()[name = string("op_2736_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_2736_to_fp16, x = input_83)[name = string("normed_77_cast_fp16")]; + tensor var_2746_split_sizes_0 = const()[name = string("op_2746_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2746_axis_0 = const()[name = string("op_2746_axis_0"), val = int32(-1)]; + tensor var_2746_0, tensor var_2746_1 = split(axis = var_2746_axis_0, split_sizes = var_2746_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_2746")]; + tensor hidden_states_23 = mul(x = var_2746_0, y = layers_c2_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_53_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 3584])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 1, 3840])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_2774 = const()[name = string("op_2774"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_2775 = transpose(perm = var_2774, x = hidden_states_25_cast_fp16)[name = string("transpose_254")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_2775)[name = string("input_85")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_c2_2_per_layer_input_gate_weight_palettized, x = input_85)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_2794 = const()[name = string("op_2794"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_2795_cast_fp16 = transpose(perm = var_2794, x = per_layer_slice_5_cast_fp16)[name = string("transpose_253")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_2795_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_87_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_87_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_c2_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923181952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923509696))))[name = string("layers_c2_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_c2_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_87_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_2811_axes_0 = const()[name = string("op_2811_axes_0"), val = tensor([2])]; + tensor var_2811_cast_fp16 = squeeze(axes = var_2811_axes_0, x = gated_17_cast_fp16)[name = string("op_2811_cast_fp16")]; + tensor var_2815 = const()[name = string("op_2815"), val = tensor([0, 2, 1])]; + int32 var_2821 = const()[name = string("op_2821"), val = int32(-1)]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_57_cast_fp16 = transpose(perm = var_2815, x = var_2811_cast_fp16)[name = string("transpose_252")]; + tensor var_2823_cast_fp16 = mul(x = x_57_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2823_cast_fp16")]; + bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; + tensor input_89_cast_fp16 = concat(axis = var_2821, interleave = input_89_interleave_0, values = (x_57_cast_fp16, var_2823_cast_fp16))[name = string("input_89_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_2818_to_fp16 = const()[name = string("op_2818_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_2818_to_fp16, x = input_89_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor var_2828_split_sizes_0 = const()[name = string("op_2828_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2828_axis_0 = const()[name = string("op_2828_axis_0"), val = int32(-1)]; + tensor var_2828_cast_fp16_0, tensor var_2828_cast_fp16_1 = split(axis = var_2828_axis_0, split_sizes = var_2828_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_2828_cast_fp16")]; + tensor layers_c2_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923512320)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_2828_cast_fp16_0, y = layers_c2_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = tensor([0x1.58p-1])]; + tensor x_59_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_35_promoted_to_fp16)[name = string("x_59_cast_fp16")]; + tensor var_2840_axes_0 = const()[name = string("op_2840_axes_0"), val = tensor([0])]; + tensor var_2840_cast_fp16 = squeeze(axes = var_2840_axes_0, x = K_sliding_out_5_cast_fp16)[name = string("op_2840_cast_fp16")]; + tensor var_2842_axes_0 = const()[name = string("op_2842_axes_0"), val = tensor([0])]; + tensor var_2842_cast_fp16 = squeeze(axes = var_2842_axes_0, x = V_sliding_out_5_cast_fp16)[name = string("op_2842_cast_fp16")]; + tensor var_2845_begin_0 = const()[name = string("op_2845_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2845_end_0 = const()[name = string("op_2845_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2845_end_mask_0 = const()[name = string("op_2845_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2845_squeeze_mask_0 = const()[name = string("op_2845_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2845_cast_fp16 = slice_by_index(begin = var_2845_begin_0, end = var_2845_end_0, end_mask = var_2845_end_mask_0, squeeze_mask = var_2845_squeeze_mask_0, x = K_sliding_in)[name = string("op_2845_cast_fp16")]; + tensor K_sliding_slot_7_axes_0 = const()[name = string("K_sliding_slot_7_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_7_cast_fp16 = expand_dims(axes = K_sliding_slot_7_axes_0, x = var_2845_cast_fp16)[name = string("K_sliding_slot_7_cast_fp16")]; + tensor var_2850_begin_0 = const()[name = string("op_2850_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_2850_end_0 = const()[name = string("op_2850_end_0"), val = tensor([4, 2, 512, 512])]; + tensor var_2850_end_mask_0 = const()[name = string("op_2850_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2850_squeeze_mask_0 = const()[name = string("op_2850_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_2850_cast_fp16 = slice_by_index(begin = var_2850_begin_0, end = var_2850_end_0, end_mask = var_2850_end_mask_0, squeeze_mask = var_2850_squeeze_mask_0, x = V_sliding_in)[name = string("op_2850_cast_fp16")]; + tensor V_sliding_slot_7_axes_0 = const()[name = string("V_sliding_slot_7_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_7_cast_fp16 = expand_dims(axes = V_sliding_slot_7_axes_0, x = var_2850_cast_fp16)[name = string("V_sliding_slot_7_cast_fp16")]; + int32 var_2857 = const()[name = string("op_2857"), val = int32(-1)]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2859_cast_fp16 = mul(x = x_59_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2859_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_2857, interleave = input_91_interleave_0, values = (x_59_cast_fp16, var_2859_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_2854_to_fp16 = const()[name = string("op_2854_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_2854_to_fp16, x = input_91_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_2864_split_sizes_0 = const()[name = string("op_2864_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2864_axis_0 = const()[name = string("op_2864_axis_0"), val = int32(-1)]; + tensor var_2864_cast_fp16_0, tensor var_2864_cast_fp16_1 = split(axis = var_2864_axis_0, split_sizes = var_2864_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_2864_cast_fp16")]; + tensor layers_c2_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923517504)))]; + tensor h_19_cast_fp16 = mul(x = var_2864_cast_fp16_0, y = layers_c2_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_2870 = const()[name = string("op_2870"), val = tensor([0, 2, 1])]; + tensor var_2873_axes_0 = const()[name = string("op_2873_axes_0"), val = tensor([2])]; + tensor var_2871_cast_fp16 = transpose(perm = var_2870, x = h_19_cast_fp16)[name = string("transpose_251")]; + tensor var_2873_cast_fp16 = expand_dims(axes = var_2873_axes_0, x = var_2871_cast_fp16)[name = string("op_2873_cast_fp16")]; + string var_2889_pad_type_0 = const()[name = string("op_2889_pad_type_0"), val = string("valid")]; + tensor var_2889_strides_0 = const()[name = string("op_2889_strides_0"), val = tensor([1, 1])]; + tensor var_2889_pad_0 = const()[name = string("op_2889_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2889_dilations_0 = const()[name = string("op_2889_dilations_0"), val = tensor([1, 1])]; + int32 var_2889_groups_0 = const()[name = string("op_2889_groups_0"), val = int32(1)]; + tensor var_2889 = conv(dilations = var_2889_dilations_0, groups = var_2889_groups_0, pad = var_2889_pad_0, pad_type = var_2889_pad_type_0, strides = var_2889_strides_0, weight = layers_c2_3_self_attn_q_proj_weight_palettized, x = var_2873_cast_fp16)[name = string("op_2889")]; + tensor var_2894 = const()[name = string("op_2894"), val = tensor([1, 8, 256, 1])]; + tensor var_2895 = reshape(shape = var_2894, x = var_2889)[name = string("op_2895")]; + tensor var_2900 = const()[name = string("op_2900"), val = tensor([0, 1, 3, 2])]; + tensor var_2910 = const()[name = string("op_2910"), val = tensor([1, 8, 256])]; + tensor var_2901 = transpose(perm = var_2900, x = var_2895)[name = string("transpose_250")]; + tensor x_61 = reshape(shape = var_2910, x = var_2901)[name = string("x_61")]; + int32 var_2916 = const()[name = string("op_2916"), val = int32(-1)]; + fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; + tensor var_2918 = mul(x = x_61, y = const_37_promoted)[name = string("op_2918")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95 = concat(axis = var_2916, interleave = input_95_interleave_0, values = (x_61, var_2918))[name = string("input_95")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_2913_to_fp16 = const()[name = string("op_2913_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_2913_to_fp16, x = input_95)[name = string("normed_89_cast_fp16")]; + tensor var_2923_split_sizes_0 = const()[name = string("op_2923_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2923_axis_0 = const()[name = string("op_2923_axis_0"), val = int32(-1)]; + tensor var_2923_0, tensor var_2923_1 = split(axis = var_2923_axis_0, split_sizes = var_2923_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_2923")]; + tensor var_2925 = mul(x = var_2923_0, y = layers_c2_3_self_attn_q_norm_weight)[name = string("op_2925")]; + tensor var_2930 = const()[name = string("op_2930"), val = tensor([1, 8, 1, 256])]; + tensor q_27 = reshape(shape = var_2930, x = var_2925)[name = string("q_27")]; + tensor var_2932_cast_fp16 = mul(x = q_27, y = cos_s)[name = string("op_2932_cast_fp16")]; + tensor var_2933_split_sizes_0 = const()[name = string("op_2933_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2933_axis_0 = const()[name = string("op_2933_axis_0"), val = int32(-1)]; + tensor var_2933_0, tensor var_2933_1 = split(axis = var_2933_axis_0, split_sizes = var_2933_split_sizes_0, x = q_27)[name = string("op_2933")]; + fp16 const_38_promoted = const()[name = string("const_38_promoted"), val = fp16(-0x1p+0)]; + tensor var_2935 = mul(x = var_2933_1, y = const_38_promoted)[name = string("op_2935")]; + int32 var_2937 = const()[name = string("op_2937"), val = int32(-1)]; + bool var_2938_interleave_0 = const()[name = string("op_2938_interleave_0"), val = bool(false)]; + tensor var_2938 = concat(axis = var_2937, interleave = var_2938_interleave_0, values = (var_2935, var_2933_0))[name = string("op_2938")]; + tensor var_2939_cast_fp16 = mul(x = var_2938, y = sin_s)[name = string("op_2939_cast_fp16")]; + tensor q_31_cast_fp16 = add(x = var_2932_cast_fp16, y = var_2939_cast_fp16)[name = string("q_31_cast_fp16")]; + string var_2952_pad_type_0 = const()[name = string("op_2952_pad_type_0"), val = string("valid")]; + tensor var_2952_strides_0 = const()[name = string("op_2952_strides_0"), val = tensor([1, 1])]; + tensor var_2952_pad_0 = const()[name = string("op_2952_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2952_dilations_0 = const()[name = string("op_2952_dilations_0"), val = tensor([1, 1])]; + int32 var_2952_groups_0 = const()[name = string("op_2952_groups_0"), val = int32(1)]; + tensor var_2952 = conv(dilations = var_2952_dilations_0, groups = var_2952_groups_0, pad = var_2952_pad_0, pad_type = var_2952_pad_type_0, strides = var_2952_strides_0, weight = layers_c2_3_self_attn_k_proj_weight_palettized, x = var_2873_cast_fp16)[name = string("op_2952")]; + tensor var_2957 = const()[name = string("op_2957"), val = tensor([1, 2, 256, 1])]; + tensor var_2958 = reshape(shape = var_2957, x = var_2952)[name = string("op_2958")]; + tensor var_2963 = const()[name = string("op_2963"), val = tensor([0, 1, 3, 2])]; + string var_2980_pad_type_0 = const()[name = string("op_2980_pad_type_0"), val = string("valid")]; + tensor var_2980_strides_0 = const()[name = string("op_2980_strides_0"), val = tensor([1, 1])]; + tensor var_2980_pad_0 = const()[name = string("op_2980_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2980_dilations_0 = const()[name = string("op_2980_dilations_0"), val = tensor([1, 1])]; + int32 var_2980_groups_0 = const()[name = string("op_2980_groups_0"), val = int32(1)]; + tensor var_2980 = conv(dilations = var_2980_dilations_0, groups = var_2980_groups_0, pad = var_2980_pad_0, pad_type = var_2980_pad_type_0, strides = var_2980_strides_0, weight = layers_c2_3_self_attn_v_proj_weight_palettized, x = var_2873_cast_fp16)[name = string("op_2980")]; + tensor var_2985 = const()[name = string("op_2985"), val = tensor([1, 2, 256, 1])]; + tensor var_2986 = reshape(shape = var_2985, x = var_2980)[name = string("op_2986")]; + tensor var_2991 = const()[name = string("op_2991"), val = tensor([0, 1, 3, 2])]; + tensor var_3001 = const()[name = string("op_3001"), val = tensor([1, 2, 256])]; + tensor var_2964 = transpose(perm = var_2963, x = var_2958)[name = string("transpose_249")]; + tensor x_63 = reshape(shape = var_3001, x = var_2964)[name = string("x_63")]; + int32 var_3007 = const()[name = string("op_3007"), val = int32(-1)]; + fp16 const_39_promoted = const()[name = string("const_39_promoted"), val = fp16(-0x1p+0)]; + tensor var_3009 = mul(x = x_63, y = const_39_promoted)[name = string("op_3009")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97 = concat(axis = var_3007, interleave = input_97_interleave_0, values = (x_63, var_3009))[name = string("input_97")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_3004_to_fp16 = const()[name = string("op_3004_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_3004_to_fp16, x = input_97)[name = string("normed_93_cast_fp16")]; + tensor var_3014_split_sizes_0 = const()[name = string("op_3014_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3014_axis_0 = const()[name = string("op_3014_axis_0"), val = int32(-1)]; + tensor var_3014_0, tensor var_3014_1 = split(axis = var_3014_axis_0, split_sizes = var_3014_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_3014")]; + tensor var_3016 = mul(x = var_3014_0, y = layers_c2_3_self_attn_k_norm_weight)[name = string("op_3016")]; + tensor var_3021 = const()[name = string("op_3021"), val = tensor([1, 2, 1, 256])]; + tensor q_29 = reshape(shape = var_3021, x = var_3016)[name = string("q_29")]; + fp16 var_3023_promoted = const()[name = string("op_3023_promoted"), val = fp16(0x1p+1)]; + tensor var_2992 = transpose(perm = var_2991, x = var_2986)[name = string("transpose_248")]; + tensor var_3024 = pow(x = var_2992, y = var_3023_promoted)[name = string("op_3024")]; + tensor var_3029_axes_0 = const()[name = string("op_3029_axes_0"), val = tensor([-1])]; + bool var_3029_keep_dims_0 = const()[name = string("op_3029_keep_dims_0"), val = bool(true)]; + tensor var_3029 = reduce_mean(axes = var_3029_axes_0, keep_dims = var_3029_keep_dims_0, x = var_3024)[name = string("op_3029")]; + fp16 var_3031_to_fp16 = const()[name = string("op_3031_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_7_cast_fp16 = add(x = var_3029, y = var_3031_to_fp16)[name = string("mean_sq_7_cast_fp16")]; + fp32 var_3033_epsilon_0 = const()[name = string("op_3033_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3033_cast_fp16 = rsqrt(epsilon = var_3033_epsilon_0, x = mean_sq_7_cast_fp16)[name = string("op_3033_cast_fp16")]; + tensor input_101_cast_fp16 = mul(x = var_2992, y = var_3033_cast_fp16)[name = string("input_101_cast_fp16")]; + tensor var_3035_cast_fp16 = mul(x = q_29, y = cos_s)[name = string("op_3035_cast_fp16")]; + tensor var_3036_split_sizes_0 = const()[name = string("op_3036_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3036_axis_0 = const()[name = string("op_3036_axis_0"), val = int32(-1)]; + tensor var_3036_0, tensor var_3036_1 = split(axis = var_3036_axis_0, split_sizes = var_3036_split_sizes_0, x = q_29)[name = string("op_3036")]; + fp16 const_40_promoted = const()[name = string("const_40_promoted"), val = fp16(-0x1p+0)]; + tensor var_3038 = mul(x = var_3036_1, y = const_40_promoted)[name = string("op_3038")]; + int32 var_3040 = const()[name = string("op_3040"), val = int32(-1)]; + bool var_3041_interleave_0 = const()[name = string("op_3041_interleave_0"), val = bool(false)]; + tensor var_3041 = concat(axis = var_3040, interleave = var_3041_interleave_0, values = (var_3038, var_3036_0))[name = string("op_3041")]; + tensor var_3042_cast_fp16 = mul(x = var_3041, y = sin_s)[name = string("op_3042_cast_fp16")]; + tensor input_99_cast_fp16 = add(x = var_3035_cast_fp16, y = var_3042_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor k_padded_7_pad_0 = const()[name = string("k_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_7_mode_0 = const()[name = string("k_padded_7_mode_0"), val = string("constant")]; + fp16 const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_7_cast_fp16 = pad(constant_val = const_41_to_fp16, mode = k_padded_7_mode_0, pad = k_padded_7_pad_0, x = input_99_cast_fp16)[name = string("k_padded_7_cast_fp16")]; + tensor v_padded_7_pad_0 = const()[name = string("v_padded_7_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_7_mode_0 = const()[name = string("v_padded_7_mode_0"), val = string("constant")]; + fp16 const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_7_cast_fp16 = pad(constant_val = const_42_to_fp16, mode = v_padded_7_mode_0, pad = v_padded_7_pad_0, x = input_101_cast_fp16)[name = string("v_padded_7_cast_fp16")]; + tensor var_3071_begin_0 = const()[name = string("op_3071_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_3071_end_0 = const()[name = string("op_3071_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3071_end_mask_0 = const()[name = string("op_3071_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3071_cast_fp16 = slice_by_index(begin = var_3071_begin_0, end = var_3071_end_0, end_mask = var_3071_end_mask_0, x = K_sliding_slot_7_cast_fp16)[name = string("op_3071_cast_fp16")]; + int32 var_3078 = const()[name = string("op_3078"), val = int32(2)]; + bool K_sliding_out_7_interleave_0 = const()[name = string("K_sliding_out_7_interleave_0"), val = bool(false)]; + tensor K_sliding_out_7_cast_fp16 = concat(axis = var_3078, interleave = K_sliding_out_7_interleave_0, values = (var_3071_cast_fp16, k_padded_7_cast_fp16))[name = string("K_sliding_out_7_cast_fp16")]; + tensor var_3094_begin_0 = const()[name = string("op_3094_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_3094_end_0 = const()[name = string("op_3094_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3094_end_mask_0 = const()[name = string("op_3094_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3094_cast_fp16 = slice_by_index(begin = var_3094_begin_0, end = var_3094_end_0, end_mask = var_3094_end_mask_0, x = V_sliding_slot_7_cast_fp16)[name = string("op_3094_cast_fp16")]; + int32 var_3101 = const()[name = string("op_3101"), val = int32(2)]; + bool V_sliding_out_7_interleave_0 = const()[name = string("V_sliding_out_7_interleave_0"), val = bool(false)]; + tensor V_sliding_out_7_cast_fp16 = concat(axis = var_3101, interleave = V_sliding_out_7_interleave_0, values = (var_3094_cast_fp16, v_padded_7_cast_fp16))[name = string("V_sliding_out_7_cast_fp16")]; + tensor K_for_attn_7_begin_0 = const()[name = string("K_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_7_end_0 = const()[name = string("K_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_7_end_mask_0 = const()[name = string("K_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_7_cast_fp16 = slice_by_index(begin = K_for_attn_7_begin_0, end = K_for_attn_7_end_0, end_mask = K_for_attn_7_end_mask_0, x = K_sliding_out_7_cast_fp16)[name = string("K_for_attn_7_cast_fp16")]; + tensor V_for_attn_7_begin_0 = const()[name = string("V_for_attn_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_7_end_0 = const()[name = string("V_for_attn_7_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_7_end_mask_0 = const()[name = string("V_for_attn_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_7_cast_fp16 = slice_by_index(begin = V_for_attn_7_begin_0, end = V_for_attn_7_end_0, end_mask = V_for_attn_7_end_mask_0, x = V_sliding_out_7_cast_fp16)[name = string("V_for_attn_7_cast_fp16")]; + tensor transpose_12_perm_0 = const()[name = string("transpose_12_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_6_reps_0 = const()[name = string("tile_6_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = K_for_attn_7_cast_fp16)[name = string("transpose_247")]; + tensor tile_6_cast_fp16 = tile(reps = tile_6_reps_0, x = transpose_12_cast_fp16)[name = string("tile_6_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_12, x = tile_6_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = reshape_12_cast_fp16)[name = string("transpose_246")]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_13, x = transpose_13_cast_fp16)[name = string("reshape_13_cast_fp16")]; + tensor transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_14_perm_0 = const()[name = string("transpose_14_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_7_reps_0 = const()[name = string("tile_7_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = V_for_attn_7_cast_fp16)[name = string("transpose_245")]; + tensor tile_7_cast_fp16 = tile(reps = tile_7_reps_0, x = transpose_14_cast_fp16)[name = string("tile_7_cast_fp16")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_14, x = tile_7_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor transpose_15_perm_0 = const()[name = string("transpose_15_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_244")]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_15, x = transpose_15_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor V_expanded_7_perm_0 = const()[name = string("V_expanded_7_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor transpose_87_cast_fp16 = transpose(perm = transpose_87_perm_0, x = reshape_13_cast_fp16)[name = string("transpose_243")]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_31_cast_fp16, y = transpose_87_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_3")]; + tensor var_3142 = sub(x = x_67_cast_fp16, y = reduce_max_3)[name = string("op_3142")]; + tensor var_3148 = exp(x = var_3142)[name = string("op_3148")]; + tensor var_3158_axes_0 = const()[name = string("op_3158_axes_0"), val = tensor([-1])]; + bool var_3158_keep_dims_0 = const()[name = string("op_3158_keep_dims_0"), val = bool(true)]; + tensor var_3158 = reduce_sum(axes = var_3158_axes_0, keep_dims = var_3158_keep_dims_0, x = var_3148)[name = string("op_3158")]; + tensor var_3164_cast_fp16 = real_div(x = var_3148, y = var_3158)[name = string("op_3164_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor V_expanded_7_cast_fp16 = transpose(perm = V_expanded_7_perm_0, x = reshape_15_cast_fp16)[name = string("transpose_242")]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_3164_cast_fp16, y = V_expanded_7_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_3175 = const()[name = string("op_3175"), val = tensor([0, 2, 1, 3])]; + tensor var_3182 = const()[name = string("op_3182"), val = tensor([1, 1, -1])]; + tensor var_3176_cast_fp16 = transpose(perm = var_3175, x = attn_output_19_cast_fp16)[name = string("transpose_241")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_3182, x = var_3176_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_3187 = const()[name = string("op_3187"), val = tensor([0, 2, 1])]; + string var_3203_pad_type_0 = const()[name = string("op_3203_pad_type_0"), val = string("valid")]; + int32 var_3203_groups_0 = const()[name = string("op_3203_groups_0"), val = int32(1)]; + tensor var_3203_strides_0 = const()[name = string("op_3203_strides_0"), val = tensor([1])]; + tensor var_3203_pad_0 = const()[name = string("op_3203_pad_0"), val = tensor([0, 0])]; + tensor var_3203_dilations_0 = const()[name = string("op_3203_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(923522688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(926144192))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3188_cast_fp16 = transpose(perm = var_3187, x = attn_output_21_cast_fp16)[name = string("transpose_240")]; + tensor var_3203_cast_fp16 = conv(dilations = var_3203_dilations_0, groups = var_3203_groups_0, pad = var_3203_pad_0, pad_type = var_3203_pad_type_0, strides = var_3203_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3188_cast_fp16)[name = string("op_3203_cast_fp16")]; + tensor var_3207 = const()[name = string("op_3207"), val = tensor([0, 2, 1])]; + int32 var_3213 = const()[name = string("op_3213"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_3207, x = var_3203_cast_fp16)[name = string("transpose_239")]; + tensor var_3215_cast_fp16 = mul(x = x_71_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_3215_cast_fp16")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105_cast_fp16 = concat(axis = var_3213, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_3215_cast_fp16))[name = string("input_105_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_3210_to_fp16 = const()[name = string("op_3210_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_3210_to_fp16, x = input_105_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor var_3220_split_sizes_0 = const()[name = string("op_3220_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3220_axis_0 = const()[name = string("op_3220_axis_0"), val = int32(-1)]; + tensor var_3220_cast_fp16_0, tensor var_3220_cast_fp16_1 = split(axis = var_3220_axis_0, split_sizes = var_3220_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_3220_cast_fp16")]; + tensor layers_c2_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(926146816)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_3220_cast_fp16_0, y = layers_c2_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_59_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_3229 = const()[name = string("op_3229"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3231_cast_fp16 = mul(x = x_73_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_3231_cast_fp16")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107_cast_fp16 = concat(axis = var_3229, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_3231_cast_fp16))[name = string("input_107_cast_fp16")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_3226_to_fp16 = const()[name = string("op_3226_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_3226_to_fp16, x = input_107_cast_fp16)[name = string("normed_101_cast_fp16")]; + tensor var_3236_split_sizes_0 = const()[name = string("op_3236_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3236_axis_0 = const()[name = string("op_3236_axis_0"), val = int32(-1)]; + tensor var_3236_cast_fp16_0, tensor var_3236_cast_fp16_1 = split(axis = var_3236_axis_0, split_sizes = var_3236_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_3236_cast_fp16")]; + tensor layers_c2_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(926152000)))]; + tensor h_21_cast_fp16 = mul(x = var_3236_cast_fp16_0, y = layers_c2_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_3247 = const()[name = string("op_3247"), val = tensor([0, 2, 1])]; + tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([2])]; + tensor var_3248 = transpose(perm = var_3247, x = h_21_cast_fp16)[name = string("transpose_238")]; + tensor input_109 = expand_dims(axes = input_109_axes_0, x = var_3248)[name = string("input_109")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_c2_3_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_c2_3_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_111 = mul(x = gate_15, y = up_7)[name = string("input_111")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_c2_3_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_7")]; + tensor var_3288_axes_0 = const()[name = string("op_3288_axes_0"), val = tensor([2])]; + tensor var_3288 = squeeze(axes = var_3288_axes_0, x = mlp_out_7)[name = string("op_3288")]; + tensor var_3292 = const()[name = string("op_3292"), val = tensor([0, 2, 1])]; + int32 var_3298 = const()[name = string("op_3298"), val = int32(-1)]; + fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_3292, x = var_3288)[name = string("transpose_237")]; + tensor var_3300 = mul(x = x_75, y = const_45_promoted)[name = string("op_3300")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_3298, interleave = input_113_interleave_0, values = (x_75, var_3300))[name = string("input_113")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_3295_to_fp16 = const()[name = string("op_3295_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_3295_to_fp16, x = input_113)[name = string("normed_105_cast_fp16")]; + tensor var_3305_split_sizes_0 = const()[name = string("op_3305_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3305_axis_0 = const()[name = string("op_3305_axis_0"), val = int32(-1)]; + tensor var_3305_0, tensor var_3305_1 = split(axis = var_3305_axis_0, split_sizes = var_3305_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_3305")]; + tensor hidden_states_33 = mul(x = var_3305_0, y = layers_c2_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 3840])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 1, 4096])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_3333 = const()[name = string("op_3333"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_3334 = transpose(perm = var_3333, x = hidden_states_35_cast_fp16)[name = string("transpose_236")]; + tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_3334)[name = string("input_115")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_c2_3_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_3353 = const()[name = string("op_3353"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_3354_cast_fp16 = transpose(perm = var_3353, x = per_layer_slice_7_cast_fp16)[name = string("transpose_235")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_3354_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_117_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_c2_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(926157184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(926484928))))[name = string("layers_c2_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_c2_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_3370_axes_0 = const()[name = string("op_3370_axes_0"), val = tensor([2])]; + tensor var_3370_cast_fp16 = squeeze(axes = var_3370_axes_0, x = gated_23_cast_fp16)[name = string("op_3370_cast_fp16")]; + tensor var_3374 = const()[name = string("op_3374"), val = tensor([0, 2, 1])]; + int32 var_3380 = const()[name = string("op_3380"), val = int32(-1)]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_3374, x = var_3370_cast_fp16)[name = string("transpose_234")]; + tensor var_3382_cast_fp16 = mul(x = x_77_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_3382_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_3380, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_3382_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_3377_to_fp16 = const()[name = string("op_3377_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_3377_to_fp16, x = input_119_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor var_3387_split_sizes_0 = const()[name = string("op_3387_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3387_axis_0 = const()[name = string("op_3387_axis_0"), val = int32(-1)]; + tensor var_3387_cast_fp16_0, tensor var_3387_cast_fp16_1 = split(axis = var_3387_axis_0, split_sizes = var_3387_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_3387_cast_fp16")]; + tensor layers_c2_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(926487552)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_3387_cast_fp16_0, y = layers_c2_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor([0x1.14p-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + tensor var_3399_axes_0 = const()[name = string("op_3399_axes_0"), val = tensor([0])]; + tensor var_3399_cast_fp16 = squeeze(axes = var_3399_axes_0, x = K_sliding_out_7_cast_fp16)[name = string("op_3399_cast_fp16")]; + tensor var_3401_axes_0 = const()[name = string("op_3401_axes_0"), val = tensor([0])]; + tensor var_3401_cast_fp16 = squeeze(axes = var_3401_axes_0, x = V_sliding_out_7_cast_fp16)[name = string("op_3401_cast_fp16")]; + tensor var_3404_begin_0 = const()[name = string("op_3404_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3404_end_0 = const()[name = string("op_3404_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_3404_end_mask_0 = const()[name = string("op_3404_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3404_squeeze_mask_0 = const()[name = string("op_3404_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3404_cast_fp16 = slice_by_index(begin = var_3404_begin_0, end = var_3404_end_0, end_mask = var_3404_end_mask_0, squeeze_mask = var_3404_squeeze_mask_0, x = K_sliding_in)[name = string("op_3404_cast_fp16")]; + tensor K_sliding_slot_9_axes_0 = const()[name = string("K_sliding_slot_9_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_9_cast_fp16 = expand_dims(axes = K_sliding_slot_9_axes_0, x = var_3404_cast_fp16)[name = string("K_sliding_slot_9_cast_fp16")]; + tensor var_3409_begin_0 = const()[name = string("op_3409_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3409_end_0 = const()[name = string("op_3409_end_0"), val = tensor([5, 2, 512, 512])]; + tensor var_3409_end_mask_0 = const()[name = string("op_3409_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3409_squeeze_mask_0 = const()[name = string("op_3409_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3409_cast_fp16 = slice_by_index(begin = var_3409_begin_0, end = var_3409_end_0, end_mask = var_3409_end_mask_0, squeeze_mask = var_3409_squeeze_mask_0, x = V_sliding_in)[name = string("op_3409_cast_fp16")]; + tensor V_sliding_slot_9_axes_0 = const()[name = string("V_sliding_slot_9_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_9_cast_fp16 = expand_dims(axes = V_sliding_slot_9_axes_0, x = var_3409_cast_fp16)[name = string("V_sliding_slot_9_cast_fp16")]; + int32 var_3416 = const()[name = string("op_3416"), val = int32(-1)]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3418_cast_fp16 = mul(x = x_79_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_3418_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_3416, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_3418_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_3413_to_fp16 = const()[name = string("op_3413_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_3413_to_fp16, x = input_121_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor var_3423_split_sizes_0 = const()[name = string("op_3423_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3423_axis_0 = const()[name = string("op_3423_axis_0"), val = int32(-1)]; + tensor var_3423_cast_fp16_0, tensor var_3423_cast_fp16_1 = split(axis = var_3423_axis_0, split_sizes = var_3423_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_3423_cast_fp16")]; + tensor layers_c2_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(926492736)))]; + tensor h_25_cast_fp16 = mul(x = var_3423_cast_fp16_0, y = layers_c2_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_3429 = const()[name = string("op_3429"), val = tensor([0, 2, 1])]; + tensor var_3432_axes_0 = const()[name = string("op_3432_axes_0"), val = tensor([2])]; + tensor var_3430_cast_fp16 = transpose(perm = var_3429, x = h_25_cast_fp16)[name = string("transpose_233")]; + tensor var_3432_cast_fp16 = expand_dims(axes = var_3432_axes_0, x = var_3430_cast_fp16)[name = string("op_3432_cast_fp16")]; + string var_3448_pad_type_0 = const()[name = string("op_3448_pad_type_0"), val = string("valid")]; + tensor var_3448_strides_0 = const()[name = string("op_3448_strides_0"), val = tensor([1, 1])]; + tensor var_3448_pad_0 = const()[name = string("op_3448_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3448_dilations_0 = const()[name = string("op_3448_dilations_0"), val = tensor([1, 1])]; + int32 var_3448_groups_0 = const()[name = string("op_3448_groups_0"), val = int32(1)]; + tensor var_3448 = conv(dilations = var_3448_dilations_0, groups = var_3448_groups_0, pad = var_3448_pad_0, pad_type = var_3448_pad_type_0, strides = var_3448_strides_0, weight = layers_c2_4_self_attn_q_proj_weight_palettized, x = var_3432_cast_fp16)[name = string("op_3448")]; + tensor var_3453 = const()[name = string("op_3453"), val = tensor([1, 8, 256, 1])]; + tensor var_3454 = reshape(shape = var_3453, x = var_3448)[name = string("op_3454")]; + tensor var_3459 = const()[name = string("op_3459"), val = tensor([0, 1, 3, 2])]; + tensor var_3469 = const()[name = string("op_3469"), val = tensor([1, 8, 256])]; + tensor var_3460 = transpose(perm = var_3459, x = var_3454)[name = string("transpose_232")]; + tensor x_81 = reshape(shape = var_3469, x = var_3460)[name = string("x_81")]; + int32 var_3475 = const()[name = string("op_3475"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor var_3477 = mul(x = x_81, y = const_49_promoted)[name = string("op_3477")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_3475, interleave = input_125_interleave_0, values = (x_81, var_3477))[name = string("input_125")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_3472_to_fp16 = const()[name = string("op_3472_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_3472_to_fp16, x = input_125)[name = string("normed_117_cast_fp16")]; + tensor var_3482_split_sizes_0 = const()[name = string("op_3482_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3482_axis_0 = const()[name = string("op_3482_axis_0"), val = int32(-1)]; + tensor var_3482_0, tensor var_3482_1 = split(axis = var_3482_axis_0, split_sizes = var_3482_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_3482")]; + tensor var_3484 = mul(x = var_3482_0, y = layers_c2_4_self_attn_q_norm_weight)[name = string("op_3484")]; + tensor var_3489 = const()[name = string("op_3489"), val = tensor([1, 8, 1, 256])]; + tensor q_35 = reshape(shape = var_3489, x = var_3484)[name = string("q_35")]; + tensor var_3491_cast_fp16 = mul(x = q_35, y = cos_s)[name = string("op_3491_cast_fp16")]; + tensor var_3492_split_sizes_0 = const()[name = string("op_3492_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3492_axis_0 = const()[name = string("op_3492_axis_0"), val = int32(-1)]; + tensor var_3492_0, tensor var_3492_1 = split(axis = var_3492_axis_0, split_sizes = var_3492_split_sizes_0, x = q_35)[name = string("op_3492")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_3494 = mul(x = var_3492_1, y = const_50_promoted)[name = string("op_3494")]; + int32 var_3496 = const()[name = string("op_3496"), val = int32(-1)]; + bool var_3497_interleave_0 = const()[name = string("op_3497_interleave_0"), val = bool(false)]; + tensor var_3497 = concat(axis = var_3496, interleave = var_3497_interleave_0, values = (var_3494, var_3492_0))[name = string("op_3497")]; + tensor var_3498_cast_fp16 = mul(x = var_3497, y = sin_s)[name = string("op_3498_cast_fp16")]; + tensor q_39_cast_fp16 = add(x = var_3491_cast_fp16, y = var_3498_cast_fp16)[name = string("q_39_cast_fp16")]; + string var_3511_pad_type_0 = const()[name = string("op_3511_pad_type_0"), val = string("valid")]; + tensor var_3511_strides_0 = const()[name = string("op_3511_strides_0"), val = tensor([1, 1])]; + tensor var_3511_pad_0 = const()[name = string("op_3511_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3511_dilations_0 = const()[name = string("op_3511_dilations_0"), val = tensor([1, 1])]; + int32 var_3511_groups_0 = const()[name = string("op_3511_groups_0"), val = int32(1)]; + tensor var_3511 = conv(dilations = var_3511_dilations_0, groups = var_3511_groups_0, pad = var_3511_pad_0, pad_type = var_3511_pad_type_0, strides = var_3511_strides_0, weight = layers_c2_4_self_attn_k_proj_weight_palettized, x = var_3432_cast_fp16)[name = string("op_3511")]; + tensor var_3516 = const()[name = string("op_3516"), val = tensor([1, 2, 256, 1])]; + tensor var_3517 = reshape(shape = var_3516, x = var_3511)[name = string("op_3517")]; + tensor var_3522 = const()[name = string("op_3522"), val = tensor([0, 1, 3, 2])]; + string var_3539_pad_type_0 = const()[name = string("op_3539_pad_type_0"), val = string("valid")]; + tensor var_3539_strides_0 = const()[name = string("op_3539_strides_0"), val = tensor([1, 1])]; + tensor var_3539_pad_0 = const()[name = string("op_3539_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3539_dilations_0 = const()[name = string("op_3539_dilations_0"), val = tensor([1, 1])]; + int32 var_3539_groups_0 = const()[name = string("op_3539_groups_0"), val = int32(1)]; + tensor var_3539 = conv(dilations = var_3539_dilations_0, groups = var_3539_groups_0, pad = var_3539_pad_0, pad_type = var_3539_pad_type_0, strides = var_3539_strides_0, weight = layers_c2_4_self_attn_v_proj_weight_palettized, x = var_3432_cast_fp16)[name = string("op_3539")]; + tensor var_3544 = const()[name = string("op_3544"), val = tensor([1, 2, 256, 1])]; + tensor var_3545 = reshape(shape = var_3544, x = var_3539)[name = string("op_3545")]; + tensor var_3550 = const()[name = string("op_3550"), val = tensor([0, 1, 3, 2])]; + tensor var_3560 = const()[name = string("op_3560"), val = tensor([1, 2, 256])]; + tensor var_3523 = transpose(perm = var_3522, x = var_3517)[name = string("transpose_231")]; + tensor x_83 = reshape(shape = var_3560, x = var_3523)[name = string("x_83")]; + int32 var_3566 = const()[name = string("op_3566"), val = int32(-1)]; + fp16 const_51_promoted = const()[name = string("const_51_promoted"), val = fp16(-0x1p+0)]; + tensor var_3568 = mul(x = x_83, y = const_51_promoted)[name = string("op_3568")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127 = concat(axis = var_3566, interleave = input_127_interleave_0, values = (x_83, var_3568))[name = string("input_127")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_3563_to_fp16 = const()[name = string("op_3563_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_3563_to_fp16, x = input_127)[name = string("normed_121_cast_fp16")]; + tensor var_3573_split_sizes_0 = const()[name = string("op_3573_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3573_axis_0 = const()[name = string("op_3573_axis_0"), val = int32(-1)]; + tensor var_3573_0, tensor var_3573_1 = split(axis = var_3573_axis_0, split_sizes = var_3573_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_3573")]; + tensor var_3575 = mul(x = var_3573_0, y = layers_c2_4_self_attn_k_norm_weight)[name = string("op_3575")]; + tensor var_3580 = const()[name = string("op_3580"), val = tensor([1, 2, 1, 256])]; + tensor q_37 = reshape(shape = var_3580, x = var_3575)[name = string("q_37")]; + fp16 var_3582_promoted = const()[name = string("op_3582_promoted"), val = fp16(0x1p+1)]; + tensor var_3551 = transpose(perm = var_3550, x = var_3545)[name = string("transpose_230")]; + tensor var_3583 = pow(x = var_3551, y = var_3582_promoted)[name = string("op_3583")]; + tensor var_3588_axes_0 = const()[name = string("op_3588_axes_0"), val = tensor([-1])]; + bool var_3588_keep_dims_0 = const()[name = string("op_3588_keep_dims_0"), val = bool(true)]; + tensor var_3588 = reduce_mean(axes = var_3588_axes_0, keep_dims = var_3588_keep_dims_0, x = var_3583)[name = string("op_3588")]; + fp16 var_3590_to_fp16 = const()[name = string("op_3590_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_9_cast_fp16 = add(x = var_3588, y = var_3590_to_fp16)[name = string("mean_sq_9_cast_fp16")]; + fp32 var_3592_epsilon_0 = const()[name = string("op_3592_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3592_cast_fp16 = rsqrt(epsilon = var_3592_epsilon_0, x = mean_sq_9_cast_fp16)[name = string("op_3592_cast_fp16")]; + tensor input_131_cast_fp16 = mul(x = var_3551, y = var_3592_cast_fp16)[name = string("input_131_cast_fp16")]; + tensor var_3594_cast_fp16 = mul(x = q_37, y = cos_s)[name = string("op_3594_cast_fp16")]; + tensor var_3595_split_sizes_0 = const()[name = string("op_3595_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3595_axis_0 = const()[name = string("op_3595_axis_0"), val = int32(-1)]; + tensor var_3595_0, tensor var_3595_1 = split(axis = var_3595_axis_0, split_sizes = var_3595_split_sizes_0, x = q_37)[name = string("op_3595")]; + fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)]; + tensor var_3597 = mul(x = var_3595_1, y = const_52_promoted)[name = string("op_3597")]; + int32 var_3599 = const()[name = string("op_3599"), val = int32(-1)]; + bool var_3600_interleave_0 = const()[name = string("op_3600_interleave_0"), val = bool(false)]; + tensor var_3600 = concat(axis = var_3599, interleave = var_3600_interleave_0, values = (var_3597, var_3595_0))[name = string("op_3600")]; + tensor var_3601_cast_fp16 = mul(x = var_3600, y = sin_s)[name = string("op_3601_cast_fp16")]; + tensor input_129_cast_fp16 = add(x = var_3594_cast_fp16, y = var_3601_cast_fp16)[name = string("input_129_cast_fp16")]; + tensor k_padded_9_pad_0 = const()[name = string("k_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_9_mode_0 = const()[name = string("k_padded_9_mode_0"), val = string("constant")]; + fp16 const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_9_cast_fp16 = pad(constant_val = const_53_to_fp16, mode = k_padded_9_mode_0, pad = k_padded_9_pad_0, x = input_129_cast_fp16)[name = string("k_padded_9_cast_fp16")]; + tensor v_padded_9_pad_0 = const()[name = string("v_padded_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_9_mode_0 = const()[name = string("v_padded_9_mode_0"), val = string("constant")]; + fp16 const_54_to_fp16 = const()[name = string("const_54_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_9_cast_fp16 = pad(constant_val = const_54_to_fp16, mode = v_padded_9_mode_0, pad = v_padded_9_pad_0, x = input_131_cast_fp16)[name = string("v_padded_9_cast_fp16")]; + tensor var_3630_begin_0 = const()[name = string("op_3630_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_3630_end_0 = const()[name = string("op_3630_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3630_end_mask_0 = const()[name = string("op_3630_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3630_cast_fp16 = slice_by_index(begin = var_3630_begin_0, end = var_3630_end_0, end_mask = var_3630_end_mask_0, x = K_sliding_slot_9_cast_fp16)[name = string("op_3630_cast_fp16")]; + int32 var_3637 = const()[name = string("op_3637"), val = int32(2)]; + bool K_sliding_out_9_interleave_0 = const()[name = string("K_sliding_out_9_interleave_0"), val = bool(false)]; + tensor K_sliding_out_9_cast_fp16 = concat(axis = var_3637, interleave = K_sliding_out_9_interleave_0, values = (var_3630_cast_fp16, k_padded_9_cast_fp16))[name = string("K_sliding_out_9_cast_fp16")]; + tensor var_3653_begin_0 = const()[name = string("op_3653_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_3653_end_0 = const()[name = string("op_3653_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_3653_end_mask_0 = const()[name = string("op_3653_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_3653_cast_fp16 = slice_by_index(begin = var_3653_begin_0, end = var_3653_end_0, end_mask = var_3653_end_mask_0, x = V_sliding_slot_9_cast_fp16)[name = string("op_3653_cast_fp16")]; + int32 var_3660 = const()[name = string("op_3660"), val = int32(2)]; + bool V_sliding_out_9_interleave_0 = const()[name = string("V_sliding_out_9_interleave_0"), val = bool(false)]; + tensor V_sliding_out_9_cast_fp16 = concat(axis = var_3660, interleave = V_sliding_out_9_interleave_0, values = (var_3653_cast_fp16, v_padded_9_cast_fp16))[name = string("V_sliding_out_9_cast_fp16")]; + tensor K_for_attn_9_begin_0 = const()[name = string("K_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_9_end_0 = const()[name = string("K_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_9_end_mask_0 = const()[name = string("K_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_9_cast_fp16 = slice_by_index(begin = K_for_attn_9_begin_0, end = K_for_attn_9_end_0, end_mask = K_for_attn_9_end_mask_0, x = K_sliding_out_9_cast_fp16)[name = string("K_for_attn_9_cast_fp16")]; + tensor V_for_attn_9_begin_0 = const()[name = string("V_for_attn_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_9_end_0 = const()[name = string("V_for_attn_9_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_9_end_mask_0 = const()[name = string("V_for_attn_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_9_cast_fp16 = slice_by_index(begin = V_for_attn_9_begin_0, end = V_for_attn_9_end_0, end_mask = V_for_attn_9_end_mask_0, x = V_sliding_out_9_cast_fp16)[name = string("V_for_attn_9_cast_fp16")]; + tensor transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_8_reps_0 = const()[name = string("tile_8_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_16_cast_fp16 = transpose(perm = transpose_16_perm_0, x = K_for_attn_9_cast_fp16)[name = string("transpose_229")]; + tensor tile_8_cast_fp16 = tile(reps = tile_8_reps_0, x = transpose_16_cast_fp16)[name = string("tile_8_cast_fp16")]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_16, x = tile_8_cast_fp16)[name = string("reshape_16_cast_fp16")]; + tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = reshape_16_cast_fp16)[name = string("transpose_228")]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_17, x = transpose_17_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_9_reps_0 = const()[name = string("tile_9_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_18_cast_fp16 = transpose(perm = transpose_18_perm_0, x = V_for_attn_9_cast_fp16)[name = string("transpose_227")]; + tensor tile_9_cast_fp16 = tile(reps = tile_9_reps_0, x = transpose_18_cast_fp16)[name = string("tile_9_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_18, x = tile_9_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_19_cast_fp16 = transpose(perm = transpose_19_perm_0, x = reshape_18_cast_fp16)[name = string("transpose_226")]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_19, x = transpose_19_cast_fp16)[name = string("reshape_19_cast_fp16")]; + tensor V_expanded_9_perm_0 = const()[name = string("V_expanded_9_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor transpose_88_cast_fp16 = transpose(perm = transpose_88_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_225")]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_39_cast_fp16, y = transpose_88_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_87_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_87_cast_fp16)[name = string("reduce_max_4")]; + tensor var_3701 = sub(x = x_87_cast_fp16, y = reduce_max_4)[name = string("op_3701")]; + tensor var_3707 = exp(x = var_3701)[name = string("op_3707")]; + tensor var_3717_axes_0 = const()[name = string("op_3717_axes_0"), val = tensor([-1])]; + bool var_3717_keep_dims_0 = const()[name = string("op_3717_keep_dims_0"), val = bool(true)]; + tensor var_3717 = reduce_sum(axes = var_3717_axes_0, keep_dims = var_3717_keep_dims_0, x = var_3707)[name = string("op_3717")]; + tensor var_3723_cast_fp16 = real_div(x = var_3707, y = var_3717)[name = string("op_3723_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor V_expanded_9_cast_fp16 = transpose(perm = V_expanded_9_perm_0, x = reshape_19_cast_fp16)[name = string("transpose_224")]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_3723_cast_fp16, y = V_expanded_9_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3734 = const()[name = string("op_3734"), val = tensor([0, 2, 1, 3])]; + tensor var_3741 = const()[name = string("op_3741"), val = tensor([1, 1, -1])]; + tensor var_3735_cast_fp16 = transpose(perm = var_3734, x = attn_output_25_cast_fp16)[name = string("transpose_223")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_3741, x = var_3735_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_3746 = const()[name = string("op_3746"), val = tensor([0, 2, 1])]; + string var_3762_pad_type_0 = const()[name = string("op_3762_pad_type_0"), val = string("valid")]; + int32 var_3762_groups_0 = const()[name = string("op_3762_groups_0"), val = int32(1)]; + tensor var_3762_strides_0 = const()[name = string("op_3762_strides_0"), val = tensor([1])]; + tensor var_3762_pad_0 = const()[name = string("op_3762_pad_0"), val = tensor([0, 0])]; + tensor var_3762_dilations_0 = const()[name = string("op_3762_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(926497920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929119424))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3747_cast_fp16 = transpose(perm = var_3746, x = attn_output_27_cast_fp16)[name = string("transpose_222")]; + tensor var_3762_cast_fp16 = conv(dilations = var_3762_dilations_0, groups = var_3762_groups_0, pad = var_3762_pad_0, pad_type = var_3762_pad_type_0, strides = var_3762_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3747_cast_fp16)[name = string("op_3762_cast_fp16")]; + tensor var_3766 = const()[name = string("op_3766"), val = tensor([0, 2, 1])]; + int32 var_3772 = const()[name = string("op_3772"), val = int32(-1)]; + fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_91_cast_fp16 = transpose(perm = var_3766, x = var_3762_cast_fp16)[name = string("transpose_221")]; + tensor var_3774_cast_fp16 = mul(x = x_91_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_3774_cast_fp16")]; + bool input_135_interleave_0 = const()[name = string("input_135_interleave_0"), val = bool(false)]; + tensor input_135_cast_fp16 = concat(axis = var_3772, interleave = input_135_interleave_0, values = (x_91_cast_fp16, var_3774_cast_fp16))[name = string("input_135_cast_fp16")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_3769_to_fp16 = const()[name = string("op_3769_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_3769_to_fp16, x = input_135_cast_fp16)[name = string("normed_125_cast_fp16")]; + tensor var_3779_split_sizes_0 = const()[name = string("op_3779_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3779_axis_0 = const()[name = string("op_3779_axis_0"), val = int32(-1)]; + tensor var_3779_cast_fp16_0, tensor var_3779_cast_fp16_1 = split(axis = var_3779_axis_0, split_sizes = var_3779_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_3779_cast_fp16")]; + tensor layers_c2_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929122048)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_3779_cast_fp16_0, y = layers_c2_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_93_cast_fp16")]; + int32 var_3788 = const()[name = string("op_3788"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3790_cast_fp16 = mul(x = x_93_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3790_cast_fp16")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137_cast_fp16 = concat(axis = var_3788, interleave = input_137_interleave_0, values = (x_93_cast_fp16, var_3790_cast_fp16))[name = string("input_137_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_3785_to_fp16 = const()[name = string("op_3785_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_3785_to_fp16, x = input_137_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_3795_split_sizes_0 = const()[name = string("op_3795_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3795_axis_0 = const()[name = string("op_3795_axis_0"), val = int32(-1)]; + tensor var_3795_cast_fp16_0, tensor var_3795_cast_fp16_1 = split(axis = var_3795_axis_0, split_sizes = var_3795_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_3795_cast_fp16")]; + tensor layers_c2_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929127232)))]; + tensor h_27_cast_fp16 = mul(x = var_3795_cast_fp16_0, y = layers_c2_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_3806 = const()[name = string("op_3806"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_3807 = transpose(perm = var_3806, x = h_27_cast_fp16)[name = string("transpose_220")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_3807)[name = string("input_139")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_c2_4_mlp_gate_proj_weight_palettized, x = input_139)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_c2_4_mlp_up_proj_weight_palettized, x = input_139)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_141 = mul(x = gate_19, y = up_9)[name = string("input_141")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_c2_4_mlp_down_proj_weight_palettized, x = input_141)[name = string("mlp_out_9")]; + tensor var_3847_axes_0 = const()[name = string("op_3847_axes_0"), val = tensor([2])]; + tensor var_3847 = squeeze(axes = var_3847_axes_0, x = mlp_out_9)[name = string("op_3847")]; + tensor var_3851 = const()[name = string("op_3851"), val = tensor([0, 2, 1])]; + int32 var_3857 = const()[name = string("op_3857"), val = int32(-1)]; + fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; + tensor x_95 = transpose(perm = var_3851, x = var_3847)[name = string("transpose_219")]; + tensor var_3859 = mul(x = x_95, y = const_57_promoted)[name = string("op_3859")]; + bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; + tensor input_143 = concat(axis = var_3857, interleave = input_143_interleave_0, values = (x_95, var_3859))[name = string("input_143")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_3854_to_fp16 = const()[name = string("op_3854_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_3854_to_fp16, x = input_143)[name = string("normed_133_cast_fp16")]; + tensor var_3864_split_sizes_0 = const()[name = string("op_3864_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3864_axis_0 = const()[name = string("op_3864_axis_0"), val = int32(-1)]; + tensor var_3864_0, tensor var_3864_1 = split(axis = var_3864_axis_0, split_sizes = var_3864_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_3864")]; + tensor hidden_states_43 = mul(x = var_3864_0, y = layers_c2_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_93_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 4096])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 1, 4352])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_3892 = const()[name = string("op_3892"), val = tensor([0, 2, 1])]; + tensor input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor([2])]; + tensor var_3893 = transpose(perm = var_3892, x = hidden_states_45_cast_fp16)[name = string("transpose_218")]; + tensor input_145 = expand_dims(axes = input_145_axes_0, x = var_3893)[name = string("input_145")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_c2_4_per_layer_input_gate_weight_palettized, x = input_145)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_3912 = const()[name = string("op_3912"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_3913_cast_fp16 = transpose(perm = var_3912, x = per_layer_slice_9_cast_fp16)[name = string("transpose_217")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_3913_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_147_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_147_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_c2_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929132416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929460160))))[name = string("layers_c2_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_c2_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_147_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_3929_axes_0 = const()[name = string("op_3929_axes_0"), val = tensor([2])]; + tensor var_3929_cast_fp16 = squeeze(axes = var_3929_axes_0, x = gated_29_cast_fp16)[name = string("op_3929_cast_fp16")]; + tensor var_3933 = const()[name = string("op_3933"), val = tensor([0, 2, 1])]; + int32 var_3939 = const()[name = string("op_3939"), val = int32(-1)]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_97_cast_fp16 = transpose(perm = var_3933, x = var_3929_cast_fp16)[name = string("transpose_216")]; + tensor var_3941_cast_fp16 = mul(x = x_97_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_3941_cast_fp16")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149_cast_fp16 = concat(axis = var_3939, interleave = input_149_interleave_0, values = (x_97_cast_fp16, var_3941_cast_fp16))[name = string("input_149_cast_fp16")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_3936_to_fp16 = const()[name = string("op_3936_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_3936_to_fp16, x = input_149_cast_fp16)[name = string("normed_137_cast_fp16")]; + tensor var_3946_split_sizes_0 = const()[name = string("op_3946_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3946_axis_0 = const()[name = string("op_3946_axis_0"), val = int32(-1)]; + tensor var_3946_cast_fp16_0, tensor var_3946_cast_fp16_1 = split(axis = var_3946_axis_0, split_sizes = var_3946_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_3946_cast_fp16")]; + tensor layers_c2_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929462784)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_3946_cast_fp16_0, y = layers_c2_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = tensor([0x1.46p-1])]; + tensor x_99_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_59_promoted_to_fp16)[name = string("x_99_cast_fp16")]; + tensor var_3958_axes_0 = const()[name = string("op_3958_axes_0"), val = tensor([0])]; + tensor var_3958_cast_fp16 = squeeze(axes = var_3958_axes_0, x = K_sliding_out_9_cast_fp16)[name = string("op_3958_cast_fp16")]; + tensor var_3960_axes_0 = const()[name = string("op_3960_axes_0"), val = tensor([0])]; + tensor var_3960_cast_fp16 = squeeze(axes = var_3960_axes_0, x = V_sliding_out_9_cast_fp16)[name = string("op_3960_cast_fp16")]; + tensor var_3963_begin_0 = const()[name = string("op_3963_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3963_end_0 = const()[name = string("op_3963_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_3963_end_mask_0 = const()[name = string("op_3963_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3963_squeeze_mask_0 = const()[name = string("op_3963_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3963_cast_fp16 = slice_by_index(begin = var_3963_begin_0, end = var_3963_end_0, end_mask = var_3963_end_mask_0, squeeze_mask = var_3963_squeeze_mask_0, x = K_full_in)[name = string("op_3963_cast_fp16")]; + tensor K_full_slot_1_axes_0 = const()[name = string("K_full_slot_1_axes_0"), val = tensor([0])]; + tensor K_full_slot_1_cast_fp16 = expand_dims(axes = K_full_slot_1_axes_0, x = var_3963_cast_fp16)[name = string("K_full_slot_1_cast_fp16")]; + tensor var_3968_begin_0 = const()[name = string("op_3968_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3968_end_0 = const()[name = string("op_3968_end_0"), val = tensor([1, 2, 2048, 512])]; + tensor var_3968_end_mask_0 = const()[name = string("op_3968_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3968_squeeze_mask_0 = const()[name = string("op_3968_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_3968_cast_fp16 = slice_by_index(begin = var_3968_begin_0, end = var_3968_end_0, end_mask = var_3968_end_mask_0, squeeze_mask = var_3968_squeeze_mask_0, x = V_full_in)[name = string("op_3968_cast_fp16")]; + tensor V_full_slot_1_axes_0 = const()[name = string("V_full_slot_1_axes_0"), val = tensor([0])]; + tensor V_full_slot_1_cast_fp16 = expand_dims(axes = V_full_slot_1_axes_0, x = var_3968_cast_fp16)[name = string("V_full_slot_1_cast_fp16")]; + int32 var_3975 = const()[name = string("op_3975"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3977_cast_fp16 = mul(x = x_99_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3977_cast_fp16")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151_cast_fp16 = concat(axis = var_3975, interleave = input_151_interleave_0, values = (x_99_cast_fp16, var_3977_cast_fp16))[name = string("input_151_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_3972_to_fp16 = const()[name = string("op_3972_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_3972_to_fp16, x = input_151_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_3982_split_sizes_0 = const()[name = string("op_3982_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3982_axis_0 = const()[name = string("op_3982_axis_0"), val = int32(-1)]; + tensor var_3982_cast_fp16_0, tensor var_3982_cast_fp16_1 = split(axis = var_3982_axis_0, split_sizes = var_3982_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_3982_cast_fp16")]; + tensor layers_c2_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929467968)))]; + tensor h_31_cast_fp16 = mul(x = var_3982_cast_fp16_0, y = layers_c2_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_3988 = const()[name = string("op_3988"), val = tensor([0, 2, 1])]; + tensor var_3991_axes_0 = const()[name = string("op_3991_axes_0"), val = tensor([2])]; + tensor var_3989_cast_fp16 = transpose(perm = var_3988, x = h_31_cast_fp16)[name = string("transpose_215")]; + tensor var_3991_cast_fp16 = expand_dims(axes = var_3991_axes_0, x = var_3989_cast_fp16)[name = string("op_3991_cast_fp16")]; + string var_4007_pad_type_0 = const()[name = string("op_4007_pad_type_0"), val = string("valid")]; + tensor var_4007_strides_0 = const()[name = string("op_4007_strides_0"), val = tensor([1, 1])]; + tensor var_4007_pad_0 = const()[name = string("op_4007_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4007_dilations_0 = const()[name = string("op_4007_dilations_0"), val = tensor([1, 1])]; + int32 var_4007_groups_0 = const()[name = string("op_4007_groups_0"), val = int32(1)]; + tensor var_4007 = conv(dilations = var_4007_dilations_0, groups = var_4007_groups_0, pad = var_4007_pad_0, pad_type = var_4007_pad_type_0, strides = var_4007_strides_0, weight = layers_c2_5_self_attn_q_proj_weight_palettized, x = var_3991_cast_fp16)[name = string("op_4007")]; + tensor var_4012 = const()[name = string("op_4012"), val = tensor([1, 8, 512, 1])]; + tensor var_4013 = reshape(shape = var_4012, x = var_4007)[name = string("op_4013")]; + tensor var_4018 = const()[name = string("op_4018"), val = tensor([0, 1, 3, 2])]; + tensor var_4028 = const()[name = string("op_4028"), val = tensor([1, 8, 512])]; + tensor var_4019 = transpose(perm = var_4018, x = var_4013)[name = string("transpose_214")]; + tensor x_101 = reshape(shape = var_4028, x = var_4019)[name = string("x_101")]; + int32 var_4034 = const()[name = string("op_4034"), val = int32(-1)]; + fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; + tensor var_4036 = mul(x = x_101, y = const_61_promoted)[name = string("op_4036")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155 = concat(axis = var_4034, interleave = input_155_interleave_0, values = (x_101, var_4036))[name = string("input_155")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_4031_to_fp16 = const()[name = string("op_4031_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_4031_to_fp16, x = input_155)[name = string("normed_145_cast_fp16")]; + tensor var_4041_split_sizes_0 = const()[name = string("op_4041_split_sizes_0"), val = tensor([512, 512])]; + int32 var_4041_axis_0 = const()[name = string("op_4041_axis_0"), val = int32(-1)]; + tensor var_4041_0, tensor var_4041_1 = split(axis = var_4041_axis_0, split_sizes = var_4041_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_4041")]; + tensor var_4043 = mul(x = var_4041_0, y = layers_c2_5_self_attn_q_norm_weight)[name = string("op_4043")]; + tensor var_4048 = const()[name = string("op_4048"), val = tensor([1, 8, 1, 512])]; + tensor q_43 = reshape(shape = var_4048, x = var_4043)[name = string("q_43")]; + tensor var_4050_cast_fp16 = mul(x = q_43, y = cos_f)[name = string("op_4050_cast_fp16")]; + tensor var_4051_split_sizes_0 = const()[name = string("op_4051_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4051_axis_0 = const()[name = string("op_4051_axis_0"), val = int32(-1)]; + tensor var_4051_0, tensor var_4051_1 = split(axis = var_4051_axis_0, split_sizes = var_4051_split_sizes_0, x = q_43)[name = string("op_4051")]; + fp16 const_62_promoted = const()[name = string("const_62_promoted"), val = fp16(-0x1p+0)]; + tensor var_4053 = mul(x = var_4051_1, y = const_62_promoted)[name = string("op_4053")]; + int32 var_4055 = const()[name = string("op_4055"), val = int32(-1)]; + bool var_4056_interleave_0 = const()[name = string("op_4056_interleave_0"), val = bool(false)]; + tensor var_4056 = concat(axis = var_4055, interleave = var_4056_interleave_0, values = (var_4053, var_4051_0))[name = string("op_4056")]; + tensor var_4057_cast_fp16 = mul(x = var_4056, y = sin_f)[name = string("op_4057_cast_fp16")]; + tensor q_47_cast_fp16 = add(x = var_4050_cast_fp16, y = var_4057_cast_fp16)[name = string("q_47_cast_fp16")]; + string var_4070_pad_type_0 = const()[name = string("op_4070_pad_type_0"), val = string("valid")]; + tensor var_4070_strides_0 = const()[name = string("op_4070_strides_0"), val = tensor([1, 1])]; + tensor var_4070_pad_0 = const()[name = string("op_4070_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4070_dilations_0 = const()[name = string("op_4070_dilations_0"), val = tensor([1, 1])]; + int32 var_4070_groups_0 = const()[name = string("op_4070_groups_0"), val = int32(1)]; + tensor var_4070 = conv(dilations = var_4070_dilations_0, groups = var_4070_groups_0, pad = var_4070_pad_0, pad_type = var_4070_pad_type_0, strides = var_4070_strides_0, weight = layers_c2_5_self_attn_k_proj_weight_palettized, x = var_3991_cast_fp16)[name = string("op_4070")]; + tensor var_4075 = const()[name = string("op_4075"), val = tensor([1, 2, 512, 1])]; + tensor var_4076 = reshape(shape = var_4075, x = var_4070)[name = string("op_4076")]; + tensor var_4081 = const()[name = string("op_4081"), val = tensor([0, 1, 3, 2])]; + string var_4098_pad_type_0 = const()[name = string("op_4098_pad_type_0"), val = string("valid")]; + tensor var_4098_strides_0 = const()[name = string("op_4098_strides_0"), val = tensor([1, 1])]; + tensor var_4098_pad_0 = const()[name = string("op_4098_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4098_dilations_0 = const()[name = string("op_4098_dilations_0"), val = tensor([1, 1])]; + int32 var_4098_groups_0 = const()[name = string("op_4098_groups_0"), val = int32(1)]; + tensor var_4098 = conv(dilations = var_4098_dilations_0, groups = var_4098_groups_0, pad = var_4098_pad_0, pad_type = var_4098_pad_type_0, strides = var_4098_strides_0, weight = layers_c2_5_self_attn_v_proj_weight_palettized, x = var_3991_cast_fp16)[name = string("op_4098")]; + tensor var_4103 = const()[name = string("op_4103"), val = tensor([1, 2, 512, 1])]; + tensor var_4104 = reshape(shape = var_4103, x = var_4098)[name = string("op_4104")]; + tensor var_4109 = const()[name = string("op_4109"), val = tensor([0, 1, 3, 2])]; + tensor var_4119 = const()[name = string("op_4119"), val = tensor([1, 2, 512])]; + tensor var_4082 = transpose(perm = var_4081, x = var_4076)[name = string("transpose_213")]; + tensor x_103 = reshape(shape = var_4119, x = var_4082)[name = string("x_103")]; + int32 var_4125 = const()[name = string("op_4125"), val = int32(-1)]; + fp16 const_63_promoted = const()[name = string("const_63_promoted"), val = fp16(-0x1p+0)]; + tensor var_4127 = mul(x = x_103, y = const_63_promoted)[name = string("op_4127")]; + bool input_157_interleave_0 = const()[name = string("input_157_interleave_0"), val = bool(false)]; + tensor input_157 = concat(axis = var_4125, interleave = input_157_interleave_0, values = (x_103, var_4127))[name = string("input_157")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_4122_to_fp16 = const()[name = string("op_4122_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_4122_to_fp16, x = input_157)[name = string("normed_149_cast_fp16")]; + tensor var_4132_split_sizes_0 = const()[name = string("op_4132_split_sizes_0"), val = tensor([512, 512])]; + int32 var_4132_axis_0 = const()[name = string("op_4132_axis_0"), val = int32(-1)]; + tensor var_4132_0, tensor var_4132_1 = split(axis = var_4132_axis_0, split_sizes = var_4132_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_4132")]; + tensor var_4134 = mul(x = var_4132_0, y = layers_c2_5_self_attn_k_norm_weight)[name = string("op_4134")]; + tensor var_4139 = const()[name = string("op_4139"), val = tensor([1, 2, 1, 512])]; + tensor q_45 = reshape(shape = var_4139, x = var_4134)[name = string("q_45")]; + fp16 var_4141_promoted = const()[name = string("op_4141_promoted"), val = fp16(0x1p+1)]; + tensor var_4110 = transpose(perm = var_4109, x = var_4104)[name = string("transpose_212")]; + tensor var_4142 = pow(x = var_4110, y = var_4141_promoted)[name = string("op_4142")]; + tensor var_4147_axes_0 = const()[name = string("op_4147_axes_0"), val = tensor([-1])]; + bool var_4147_keep_dims_0 = const()[name = string("op_4147_keep_dims_0"), val = bool(true)]; + tensor var_4147 = reduce_mean(axes = var_4147_axes_0, keep_dims = var_4147_keep_dims_0, x = var_4142)[name = string("op_4147")]; + fp16 var_4149_to_fp16 = const()[name = string("op_4149_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_11_cast_fp16 = add(x = var_4147, y = var_4149_to_fp16)[name = string("mean_sq_11_cast_fp16")]; + fp32 var_4151_epsilon_0 = const()[name = string("op_4151_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4151_cast_fp16 = rsqrt(epsilon = var_4151_epsilon_0, x = mean_sq_11_cast_fp16)[name = string("op_4151_cast_fp16")]; + tensor v_1_cast_fp16 = mul(x = var_4110, y = var_4151_cast_fp16)[name = string("v_1_cast_fp16")]; + tensor var_4153_cast_fp16 = mul(x = q_45, y = cos_f)[name = string("op_4153_cast_fp16")]; + tensor var_4154_split_sizes_0 = const()[name = string("op_4154_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4154_axis_0 = const()[name = string("op_4154_axis_0"), val = int32(-1)]; + tensor var_4154_0, tensor var_4154_1 = split(axis = var_4154_axis_0, split_sizes = var_4154_split_sizes_0, x = q_45)[name = string("op_4154")]; + fp16 const_64_promoted = const()[name = string("const_64_promoted"), val = fp16(-0x1p+0)]; + tensor var_4156 = mul(x = var_4154_1, y = const_64_promoted)[name = string("op_4156")]; + int32 var_4158 = const()[name = string("op_4158"), val = int32(-1)]; + bool var_4159_interleave_0 = const()[name = string("op_4159_interleave_0"), val = bool(false)]; + tensor var_4159 = concat(axis = var_4158, interleave = var_4159_interleave_0, values = (var_4156, var_4154_0))[name = string("op_4159")]; + tensor var_4160_cast_fp16 = mul(x = var_4159, y = sin_f)[name = string("op_4160_cast_fp16")]; + tensor k_13_cast_fp16 = add(x = var_4153_cast_fp16, y = var_4160_cast_fp16)[name = string("k_13_cast_fp16")]; + fp16 var_4163_promoted_to_fp16 = const()[name = string("op_4163_promoted_to_fp16"), val = fp16(0x1p+0)]; + tensor var_4165_cast_fp16 = sub(x = var_4163_promoted_to_fp16, y = update_mask)[name = string("op_4165_cast_fp16")]; + tensor var_4166_cast_fp16 = mul(x = K_full_slot_1_cast_fp16, y = var_4165_cast_fp16)[name = string("op_4166_cast_fp16")]; + tensor var_4167_reps_0 = const()[name = string("op_4167_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_4167_cast_fp16 = tile(reps = var_4167_reps_0, x = k_13_cast_fp16)[name = string("op_4167_cast_fp16")]; + tensor var_4168_cast_fp16 = mul(x = var_4167_cast_fp16, y = update_mask)[name = string("op_4168_cast_fp16")]; + tensor K_full_out_1_cast_fp16 = add(x = var_4166_cast_fp16, y = var_4168_cast_fp16)[name = string("K_full_out_1_cast_fp16")]; + tensor var_4174_cast_fp16 = mul(x = V_full_slot_1_cast_fp16, y = var_4165_cast_fp16)[name = string("op_4174_cast_fp16")]; + tensor var_4175_reps_0 = const()[name = string("op_4175_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_4175_cast_fp16 = tile(reps = var_4175_reps_0, x = v_1_cast_fp16)[name = string("op_4175_cast_fp16")]; + tensor var_4176_cast_fp16 = mul(x = var_4175_cast_fp16, y = update_mask)[name = string("op_4176_cast_fp16")]; + tensor V_full_out_1_cast_fp16 = add(x = var_4174_cast_fp16, y = var_4176_cast_fp16)[name = string("V_full_out_1_cast_fp16")]; + tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = K_full_out_1_cast_fp16)[name = string("transpose_211")]; + tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_20, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_210")]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_21, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = V_full_out_1_cast_fp16)[name = string("transpose_209")]; + tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_22, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; + tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_208")]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_23, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor V_expanded_11_perm_0 = const()[name = string("V_expanded_11_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor transpose_89_cast_fp16 = transpose(perm = transpose_89_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_207")]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_47_cast_fp16, y = transpose_89_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_107_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_full)[name = string("x_107_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_107_cast_fp16)[name = string("reduce_max_5")]; + tensor var_4218 = sub(x = x_107_cast_fp16, y = reduce_max_5)[name = string("op_4218")]; + tensor var_4224 = exp(x = var_4218)[name = string("op_4224")]; + tensor var_4234_axes_0 = const()[name = string("op_4234_axes_0"), val = tensor([-1])]; + bool var_4234_keep_dims_0 = const()[name = string("op_4234_keep_dims_0"), val = bool(true)]; + tensor var_4234 = reduce_sum(axes = var_4234_axes_0, keep_dims = var_4234_keep_dims_0, x = var_4224)[name = string("op_4234")]; + tensor var_4240_cast_fp16 = real_div(x = var_4224, y = var_4234)[name = string("op_4240_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor V_expanded_11_cast_fp16 = transpose(perm = V_expanded_11_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_206")]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_4240_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_4251 = const()[name = string("op_4251"), val = tensor([0, 2, 1, 3])]; + tensor var_4258 = const()[name = string("op_4258"), val = tensor([1, 1, -1])]; + tensor var_4252_cast_fp16 = transpose(perm = var_4251, x = attn_output_31_cast_fp16)[name = string("transpose_205")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_4258, x = var_4252_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_4263 = const()[name = string("op_4263"), val = tensor([0, 2, 1])]; + string var_4279_pad_type_0 = const()[name = string("op_4279_pad_type_0"), val = string("valid")]; + int32 var_4279_groups_0 = const()[name = string("op_4279_groups_0"), val = int32(1)]; + tensor var_4279_strides_0 = const()[name = string("op_4279_strides_0"), val = tensor([1])]; + tensor var_4279_pad_0 = const()[name = string("op_4279_pad_0"), val = tensor([0, 0])]; + tensor var_4279_dilations_0 = const()[name = string("op_4279_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(929473152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934716096))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4264_cast_fp16 = transpose(perm = var_4263, x = attn_output_33_cast_fp16)[name = string("transpose_204")]; + tensor var_4279_cast_fp16 = conv(dilations = var_4279_dilations_0, groups = var_4279_groups_0, pad = var_4279_pad_0, pad_type = var_4279_pad_type_0, strides = var_4279_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_4264_cast_fp16)[name = string("op_4279_cast_fp16")]; + tensor var_4283 = const()[name = string("op_4283"), val = tensor([0, 2, 1])]; + int32 var_4289 = const()[name = string("op_4289"), val = int32(-1)]; + fp16 const_65_promoted_to_fp16 = const()[name = string("const_65_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_111_cast_fp16 = transpose(perm = var_4283, x = var_4279_cast_fp16)[name = string("transpose_203")]; + tensor var_4291_cast_fp16 = mul(x = x_111_cast_fp16, y = const_65_promoted_to_fp16)[name = string("op_4291_cast_fp16")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161_cast_fp16 = concat(axis = var_4289, interleave = input_161_interleave_0, values = (x_111_cast_fp16, var_4291_cast_fp16))[name = string("input_161_cast_fp16")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_4286_to_fp16 = const()[name = string("op_4286_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_4286_to_fp16, x = input_161_cast_fp16)[name = string("normed_153_cast_fp16")]; + tensor var_4296_split_sizes_0 = const()[name = string("op_4296_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4296_axis_0 = const()[name = string("op_4296_axis_0"), val = int32(-1)]; + tensor var_4296_cast_fp16_0, tensor var_4296_cast_fp16_1 = split(axis = var_4296_axis_0, split_sizes = var_4296_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_4296_cast_fp16")]; + tensor layers_c2_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934718720)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_4296_cast_fp16_0, y = layers_c2_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_113_cast_fp16 = add(x = x_99_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_113_cast_fp16")]; + int32 var_4305 = const()[name = string("op_4305"), val = int32(-1)]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4307_cast_fp16 = mul(x = x_113_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_4307_cast_fp16")]; + bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)]; + tensor input_163_cast_fp16 = concat(axis = var_4305, interleave = input_163_interleave_0, values = (x_113_cast_fp16, var_4307_cast_fp16))[name = string("input_163_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_4302_to_fp16 = const()[name = string("op_4302_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_4302_to_fp16, x = input_163_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_4312_split_sizes_0 = const()[name = string("op_4312_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4312_axis_0 = const()[name = string("op_4312_axis_0"), val = int32(-1)]; + tensor var_4312_cast_fp16_0, tensor var_4312_cast_fp16_1 = split(axis = var_4312_axis_0, split_sizes = var_4312_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_4312_cast_fp16")]; + tensor layers_c2_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934723904)))]; + tensor h_33_cast_fp16 = mul(x = var_4312_cast_fp16_0, y = layers_c2_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_4323 = const()[name = string("op_4323"), val = tensor([0, 2, 1])]; + tensor input_165_axes_0 = const()[name = string("input_165_axes_0"), val = tensor([2])]; + tensor var_4324 = transpose(perm = var_4323, x = h_33_cast_fp16)[name = string("transpose_202")]; + tensor input_165 = expand_dims(axes = input_165_axes_0, x = var_4324)[name = string("input_165")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_c2_5_mlp_gate_proj_weight_palettized, x = input_165)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_c2_5_mlp_up_proj_weight_palettized, x = input_165)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_167 = mul(x = gate_23, y = up_11)[name = string("input_167")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_c2_5_mlp_down_proj_weight_palettized, x = input_167)[name = string("mlp_out_11")]; + tensor var_4364_axes_0 = const()[name = string("op_4364_axes_0"), val = tensor([2])]; + tensor var_4364 = squeeze(axes = var_4364_axes_0, x = mlp_out_11)[name = string("op_4364")]; + tensor var_4368 = const()[name = string("op_4368"), val = tensor([0, 2, 1])]; + int32 var_4374 = const()[name = string("op_4374"), val = int32(-1)]; + fp16 const_67_promoted = const()[name = string("const_67_promoted"), val = fp16(-0x1p+0)]; + tensor x_115 = transpose(perm = var_4368, x = var_4364)[name = string("transpose_201")]; + tensor var_4376 = mul(x = x_115, y = const_67_promoted)[name = string("op_4376")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169 = concat(axis = var_4374, interleave = input_169_interleave_0, values = (x_115, var_4376))[name = string("input_169")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_4371_to_fp16 = const()[name = string("op_4371_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_4371_to_fp16, x = input_169)[name = string("normed_161_cast_fp16")]; + tensor var_4381_split_sizes_0 = const()[name = string("op_4381_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4381_axis_0 = const()[name = string("op_4381_axis_0"), val = int32(-1)]; + tensor var_4381_0, tensor var_4381_1 = split(axis = var_4381_axis_0, split_sizes = var_4381_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_4381")]; + tensor hidden_states_53 = mul(x = var_4381_0, y = layers_c2_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_113_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 4352])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 1, 4608])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_4409 = const()[name = string("op_4409"), val = tensor([0, 2, 1])]; + tensor input_171_axes_0 = const()[name = string("input_171_axes_0"), val = tensor([2])]; + tensor var_4410 = transpose(perm = var_4409, x = hidden_states_55_cast_fp16)[name = string("transpose_200")]; + tensor input_171 = expand_dims(axes = input_171_axes_0, x = var_4410)[name = string("input_171")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_c2_5_per_layer_input_gate_weight_palettized, x = input_171)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_4429 = const()[name = string("op_4429"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_4430_cast_fp16 = transpose(perm = var_4429, x = per_layer_slice_11_cast_fp16)[name = string("transpose_199")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_4430_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_173_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_173_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_c2_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934729088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935056832))))[name = string("layers_c2_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_c2_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_173_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_4446_axes_0 = const()[name = string("op_4446_axes_0"), val = tensor([2])]; + tensor var_4446_cast_fp16 = squeeze(axes = var_4446_axes_0, x = gated_35_cast_fp16)[name = string("op_4446_cast_fp16")]; + tensor var_4450 = const()[name = string("op_4450"), val = tensor([0, 2, 1])]; + int32 var_4456 = const()[name = string("op_4456"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_117_cast_fp16 = transpose(perm = var_4450, x = var_4446_cast_fp16)[name = string("transpose_198")]; + tensor var_4458_cast_fp16 = mul(x = x_117_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_4458_cast_fp16")]; + bool input_175_interleave_0 = const()[name = string("input_175_interleave_0"), val = bool(false)]; + tensor input_175_cast_fp16 = concat(axis = var_4456, interleave = input_175_interleave_0, values = (x_117_cast_fp16, var_4458_cast_fp16))[name = string("input_175_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_4453_to_fp16 = const()[name = string("op_4453_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_4453_to_fp16, x = input_175_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor var_4463_split_sizes_0 = const()[name = string("op_4463_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4463_axis_0 = const()[name = string("op_4463_axis_0"), val = int32(-1)]; + tensor var_4463_cast_fp16_0, tensor var_4463_cast_fp16_1 = split(axis = var_4463_axis_0, split_sizes = var_4463_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_4463_cast_fp16")]; + tensor layers_c2_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935059456)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_4463_cast_fp16_0, y = layers_c2_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = tensor([0x1.b2p-2])]; + tensor x_119_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_69_promoted_to_fp16)[name = string("x_119_cast_fp16")]; + tensor var_4475_axes_0 = const()[name = string("op_4475_axes_0"), val = tensor([0])]; + tensor var_4475_cast_fp16 = squeeze(axes = var_4475_axes_0, x = K_full_out_1_cast_fp16)[name = string("op_4475_cast_fp16")]; + tensor var_4477_axes_0 = const()[name = string("op_4477_axes_0"), val = tensor([0])]; + tensor var_4477_cast_fp16 = squeeze(axes = var_4477_axes_0, x = V_full_out_1_cast_fp16)[name = string("op_4477_cast_fp16")]; + tensor var_4480_begin_0 = const()[name = string("op_4480_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4480_end_0 = const()[name = string("op_4480_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4480_end_mask_0 = const()[name = string("op_4480_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4480_squeeze_mask_0 = const()[name = string("op_4480_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4480_cast_fp16 = slice_by_index(begin = var_4480_begin_0, end = var_4480_end_0, end_mask = var_4480_end_mask_0, squeeze_mask = var_4480_squeeze_mask_0, x = K_sliding_in)[name = string("op_4480_cast_fp16")]; + tensor K_sliding_slot_11_axes_0 = const()[name = string("K_sliding_slot_11_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_11_cast_fp16 = expand_dims(axes = K_sliding_slot_11_axes_0, x = var_4480_cast_fp16)[name = string("K_sliding_slot_11_cast_fp16")]; + tensor var_4485_begin_0 = const()[name = string("op_4485_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4485_end_0 = const()[name = string("op_4485_end_0"), val = tensor([6, 2, 512, 512])]; + tensor var_4485_end_mask_0 = const()[name = string("op_4485_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4485_squeeze_mask_0 = const()[name = string("op_4485_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_4485_cast_fp16 = slice_by_index(begin = var_4485_begin_0, end = var_4485_end_0, end_mask = var_4485_end_mask_0, squeeze_mask = var_4485_squeeze_mask_0, x = V_sliding_in)[name = string("op_4485_cast_fp16")]; + tensor V_sliding_slot_11_axes_0 = const()[name = string("V_sliding_slot_11_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_11_cast_fp16 = expand_dims(axes = V_sliding_slot_11_axes_0, x = var_4485_cast_fp16)[name = string("V_sliding_slot_11_cast_fp16")]; + int32 var_4492 = const()[name = string("op_4492"), val = int32(-1)]; + fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4494_cast_fp16 = mul(x = x_119_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_4494_cast_fp16")]; + bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; + tensor input_177_cast_fp16 = concat(axis = var_4492, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_4494_cast_fp16))[name = string("input_177_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_4489_to_fp16 = const()[name = string("op_4489_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_4489_to_fp16, x = input_177_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_4499_split_sizes_0 = const()[name = string("op_4499_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4499_axis_0 = const()[name = string("op_4499_axis_0"), val = int32(-1)]; + tensor var_4499_cast_fp16_0, tensor var_4499_cast_fp16_1 = split(axis = var_4499_axis_0, split_sizes = var_4499_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_4499_cast_fp16")]; + tensor layers_c2_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935064640)))]; + tensor h_37_cast_fp16 = mul(x = var_4499_cast_fp16_0, y = layers_c2_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_4505 = const()[name = string("op_4505"), val = tensor([0, 2, 1])]; + tensor var_4508_axes_0 = const()[name = string("op_4508_axes_0"), val = tensor([2])]; + tensor var_4506_cast_fp16 = transpose(perm = var_4505, x = h_37_cast_fp16)[name = string("transpose_197")]; + tensor var_4508_cast_fp16 = expand_dims(axes = var_4508_axes_0, x = var_4506_cast_fp16)[name = string("op_4508_cast_fp16")]; + string var_4524_pad_type_0 = const()[name = string("op_4524_pad_type_0"), val = string("valid")]; + tensor var_4524_strides_0 = const()[name = string("op_4524_strides_0"), val = tensor([1, 1])]; + tensor var_4524_pad_0 = const()[name = string("op_4524_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4524_dilations_0 = const()[name = string("op_4524_dilations_0"), val = tensor([1, 1])]; + int32 var_4524_groups_0 = const()[name = string("op_4524_groups_0"), val = int32(1)]; + tensor var_4524 = conv(dilations = var_4524_dilations_0, groups = var_4524_groups_0, pad = var_4524_pad_0, pad_type = var_4524_pad_type_0, strides = var_4524_strides_0, weight = layers_c2_6_self_attn_q_proj_weight_palettized, x = var_4508_cast_fp16)[name = string("op_4524")]; + tensor var_4529 = const()[name = string("op_4529"), val = tensor([1, 8, 256, 1])]; + tensor var_4530 = reshape(shape = var_4529, x = var_4524)[name = string("op_4530")]; + tensor var_4535 = const()[name = string("op_4535"), val = tensor([0, 1, 3, 2])]; + tensor var_4545 = const()[name = string("op_4545"), val = tensor([1, 8, 256])]; + tensor var_4536 = transpose(perm = var_4535, x = var_4530)[name = string("transpose_196")]; + tensor x_121 = reshape(shape = var_4545, x = var_4536)[name = string("x_121")]; + int32 var_4551 = const()[name = string("op_4551"), val = int32(-1)]; + fp16 const_71_promoted = const()[name = string("const_71_promoted"), val = fp16(-0x1p+0)]; + tensor var_4553 = mul(x = x_121, y = const_71_promoted)[name = string("op_4553")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181 = concat(axis = var_4551, interleave = input_181_interleave_0, values = (x_121, var_4553))[name = string("input_181")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_4548_to_fp16 = const()[name = string("op_4548_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_4548_to_fp16, x = input_181)[name = string("normed_173_cast_fp16")]; + tensor var_4558_split_sizes_0 = const()[name = string("op_4558_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4558_axis_0 = const()[name = string("op_4558_axis_0"), val = int32(-1)]; + tensor var_4558_0, tensor var_4558_1 = split(axis = var_4558_axis_0, split_sizes = var_4558_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_4558")]; + tensor var_4560 = mul(x = var_4558_0, y = layers_c2_2_self_attn_q_norm_weight)[name = string("op_4560")]; + tensor var_4565 = const()[name = string("op_4565"), val = tensor([1, 8, 1, 256])]; + tensor q_51 = reshape(shape = var_4565, x = var_4560)[name = string("q_51")]; + tensor var_4567_cast_fp16 = mul(x = q_51, y = cos_s)[name = string("op_4567_cast_fp16")]; + tensor var_4568_split_sizes_0 = const()[name = string("op_4568_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4568_axis_0 = const()[name = string("op_4568_axis_0"), val = int32(-1)]; + tensor var_4568_0, tensor var_4568_1 = split(axis = var_4568_axis_0, split_sizes = var_4568_split_sizes_0, x = q_51)[name = string("op_4568")]; + fp16 const_72_promoted = const()[name = string("const_72_promoted"), val = fp16(-0x1p+0)]; + tensor var_4570 = mul(x = var_4568_1, y = const_72_promoted)[name = string("op_4570")]; + int32 var_4572 = const()[name = string("op_4572"), val = int32(-1)]; + bool var_4573_interleave_0 = const()[name = string("op_4573_interleave_0"), val = bool(false)]; + tensor var_4573 = concat(axis = var_4572, interleave = var_4573_interleave_0, values = (var_4570, var_4568_0))[name = string("op_4573")]; + tensor var_4574_cast_fp16 = mul(x = var_4573, y = sin_s)[name = string("op_4574_cast_fp16")]; + tensor q_55_cast_fp16 = add(x = var_4567_cast_fp16, y = var_4574_cast_fp16)[name = string("q_55_cast_fp16")]; + string var_4587_pad_type_0 = const()[name = string("op_4587_pad_type_0"), val = string("valid")]; + tensor var_4587_strides_0 = const()[name = string("op_4587_strides_0"), val = tensor([1, 1])]; + tensor var_4587_pad_0 = const()[name = string("op_4587_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4587_dilations_0 = const()[name = string("op_4587_dilations_0"), val = tensor([1, 1])]; + int32 var_4587_groups_0 = const()[name = string("op_4587_groups_0"), val = int32(1)]; + tensor var_4587 = conv(dilations = var_4587_dilations_0, groups = var_4587_groups_0, pad = var_4587_pad_0, pad_type = var_4587_pad_type_0, strides = var_4587_strides_0, weight = layers_c2_6_self_attn_k_proj_weight_palettized, x = var_4508_cast_fp16)[name = string("op_4587")]; + tensor var_4592 = const()[name = string("op_4592"), val = tensor([1, 2, 256, 1])]; + tensor var_4593 = reshape(shape = var_4592, x = var_4587)[name = string("op_4593")]; + tensor var_4598 = const()[name = string("op_4598"), val = tensor([0, 1, 3, 2])]; + string var_4615_pad_type_0 = const()[name = string("op_4615_pad_type_0"), val = string("valid")]; + tensor var_4615_strides_0 = const()[name = string("op_4615_strides_0"), val = tensor([1, 1])]; + tensor var_4615_pad_0 = const()[name = string("op_4615_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4615_dilations_0 = const()[name = string("op_4615_dilations_0"), val = tensor([1, 1])]; + int32 var_4615_groups_0 = const()[name = string("op_4615_groups_0"), val = int32(1)]; + tensor var_4615 = conv(dilations = var_4615_dilations_0, groups = var_4615_groups_0, pad = var_4615_pad_0, pad_type = var_4615_pad_type_0, strides = var_4615_strides_0, weight = layers_c2_6_self_attn_v_proj_weight_palettized, x = var_4508_cast_fp16)[name = string("op_4615")]; + tensor var_4620 = const()[name = string("op_4620"), val = tensor([1, 2, 256, 1])]; + tensor var_4621 = reshape(shape = var_4620, x = var_4615)[name = string("op_4621")]; + tensor var_4626 = const()[name = string("op_4626"), val = tensor([0, 1, 3, 2])]; + tensor var_4636 = const()[name = string("op_4636"), val = tensor([1, 2, 256])]; + tensor var_4599 = transpose(perm = var_4598, x = var_4593)[name = string("transpose_195")]; + tensor x_123 = reshape(shape = var_4636, x = var_4599)[name = string("x_123")]; + int32 var_4642 = const()[name = string("op_4642"), val = int32(-1)]; + fp16 const_73_promoted = const()[name = string("const_73_promoted"), val = fp16(-0x1p+0)]; + tensor var_4644 = mul(x = x_123, y = const_73_promoted)[name = string("op_4644")]; + bool input_183_interleave_0 = const()[name = string("input_183_interleave_0"), val = bool(false)]; + tensor input_183 = concat(axis = var_4642, interleave = input_183_interleave_0, values = (x_123, var_4644))[name = string("input_183")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_4639_to_fp16 = const()[name = string("op_4639_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_4639_to_fp16, x = input_183)[name = string("normed_177_cast_fp16")]; + tensor var_4649_split_sizes_0 = const()[name = string("op_4649_split_sizes_0"), val = tensor([256, 256])]; + int32 var_4649_axis_0 = const()[name = string("op_4649_axis_0"), val = int32(-1)]; + tensor var_4649_0, tensor var_4649_1 = split(axis = var_4649_axis_0, split_sizes = var_4649_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_4649")]; + tensor var_4651 = mul(x = var_4649_0, y = layers_c2_6_self_attn_k_norm_weight)[name = string("op_4651")]; + tensor var_4656 = const()[name = string("op_4656"), val = tensor([1, 2, 1, 256])]; + tensor q_53 = reshape(shape = var_4656, x = var_4651)[name = string("q_53")]; + fp16 var_4658_promoted = const()[name = string("op_4658_promoted"), val = fp16(0x1p+1)]; + tensor var_4627 = transpose(perm = var_4626, x = var_4621)[name = string("transpose_194")]; + tensor var_4659 = pow(x = var_4627, y = var_4658_promoted)[name = string("op_4659")]; + tensor var_4664_axes_0 = const()[name = string("op_4664_axes_0"), val = tensor([-1])]; + bool var_4664_keep_dims_0 = const()[name = string("op_4664_keep_dims_0"), val = bool(true)]; + tensor var_4664 = reduce_mean(axes = var_4664_axes_0, keep_dims = var_4664_keep_dims_0, x = var_4659)[name = string("op_4664")]; + fp16 var_4666_to_fp16 = const()[name = string("op_4666_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_13_cast_fp16 = add(x = var_4664, y = var_4666_to_fp16)[name = string("mean_sq_13_cast_fp16")]; + fp32 var_4668_epsilon_0 = const()[name = string("op_4668_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4668_cast_fp16 = rsqrt(epsilon = var_4668_epsilon_0, x = mean_sq_13_cast_fp16)[name = string("op_4668_cast_fp16")]; + tensor input_187_cast_fp16 = mul(x = var_4627, y = var_4668_cast_fp16)[name = string("input_187_cast_fp16")]; + tensor var_4670_cast_fp16 = mul(x = q_53, y = cos_s)[name = string("op_4670_cast_fp16")]; + tensor var_4671_split_sizes_0 = const()[name = string("op_4671_split_sizes_0"), val = tensor([128, 128])]; + int32 var_4671_axis_0 = const()[name = string("op_4671_axis_0"), val = int32(-1)]; + tensor var_4671_0, tensor var_4671_1 = split(axis = var_4671_axis_0, split_sizes = var_4671_split_sizes_0, x = q_53)[name = string("op_4671")]; + fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)]; + tensor var_4673 = mul(x = var_4671_1, y = const_74_promoted)[name = string("op_4673")]; + int32 var_4675 = const()[name = string("op_4675"), val = int32(-1)]; + bool var_4676_interleave_0 = const()[name = string("op_4676_interleave_0"), val = bool(false)]; + tensor var_4676 = concat(axis = var_4675, interleave = var_4676_interleave_0, values = (var_4673, var_4671_0))[name = string("op_4676")]; + tensor var_4677_cast_fp16 = mul(x = var_4676, y = sin_s)[name = string("op_4677_cast_fp16")]; + tensor input_185_cast_fp16 = add(x = var_4670_cast_fp16, y = var_4677_cast_fp16)[name = string("input_185_cast_fp16")]; + tensor k_padded_11_pad_0 = const()[name = string("k_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_11_mode_0 = const()[name = string("k_padded_11_mode_0"), val = string("constant")]; + fp16 const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_11_cast_fp16 = pad(constant_val = const_75_to_fp16, mode = k_padded_11_mode_0, pad = k_padded_11_pad_0, x = input_185_cast_fp16)[name = string("k_padded_11_cast_fp16")]; + tensor v_padded_11_pad_0 = const()[name = string("v_padded_11_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_11_mode_0 = const()[name = string("v_padded_11_mode_0"), val = string("constant")]; + fp16 const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_11_cast_fp16 = pad(constant_val = const_76_to_fp16, mode = v_padded_11_mode_0, pad = v_padded_11_pad_0, x = input_187_cast_fp16)[name = string("v_padded_11_cast_fp16")]; + tensor var_4706_begin_0 = const()[name = string("op_4706_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4706_end_0 = const()[name = string("op_4706_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4706_end_mask_0 = const()[name = string("op_4706_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4706_cast_fp16 = slice_by_index(begin = var_4706_begin_0, end = var_4706_end_0, end_mask = var_4706_end_mask_0, x = K_sliding_slot_11_cast_fp16)[name = string("op_4706_cast_fp16")]; + int32 var_4713 = const()[name = string("op_4713"), val = int32(2)]; + bool K_sliding_out_11_interleave_0 = const()[name = string("K_sliding_out_11_interleave_0"), val = bool(false)]; + tensor K_sliding_out_11_cast_fp16 = concat(axis = var_4713, interleave = K_sliding_out_11_interleave_0, values = (var_4706_cast_fp16, k_padded_11_cast_fp16))[name = string("K_sliding_out_11_cast_fp16")]; + tensor var_4729_begin_0 = const()[name = string("op_4729_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_4729_end_0 = const()[name = string("op_4729_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_4729_end_mask_0 = const()[name = string("op_4729_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_4729_cast_fp16 = slice_by_index(begin = var_4729_begin_0, end = var_4729_end_0, end_mask = var_4729_end_mask_0, x = V_sliding_slot_11_cast_fp16)[name = string("op_4729_cast_fp16")]; + int32 var_4736 = const()[name = string("op_4736"), val = int32(2)]; + bool V_sliding_out_11_interleave_0 = const()[name = string("V_sliding_out_11_interleave_0"), val = bool(false)]; + tensor V_sliding_out_11_cast_fp16 = concat(axis = var_4736, interleave = V_sliding_out_11_interleave_0, values = (var_4729_cast_fp16, v_padded_11_cast_fp16))[name = string("V_sliding_out_11_cast_fp16")]; + tensor K_for_attn_13_begin_0 = const()[name = string("K_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_13_end_0 = const()[name = string("K_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_13_end_mask_0 = const()[name = string("K_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_13_cast_fp16 = slice_by_index(begin = K_for_attn_13_begin_0, end = K_for_attn_13_end_0, end_mask = K_for_attn_13_end_mask_0, x = K_sliding_out_11_cast_fp16)[name = string("K_for_attn_13_cast_fp16")]; + tensor V_for_attn_13_begin_0 = const()[name = string("V_for_attn_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_13_end_0 = const()[name = string("V_for_attn_13_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_13_end_mask_0 = const()[name = string("V_for_attn_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_13_cast_fp16 = slice_by_index(begin = V_for_attn_13_begin_0, end = V_for_attn_13_end_0, end_mask = V_for_attn_13_end_mask_0, x = V_sliding_out_11_cast_fp16)[name = string("V_for_attn_13_cast_fp16")]; + tensor transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_12_reps_0 = const()[name = string("tile_12_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_24_cast_fp16 = transpose(perm = transpose_24_perm_0, x = K_for_attn_13_cast_fp16)[name = string("transpose_193")]; + tensor tile_12_cast_fp16 = tile(reps = tile_12_reps_0, x = transpose_24_cast_fp16)[name = string("tile_12_cast_fp16")]; + tensor concat_24 = const()[name = string("concat_24"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_24, x = tile_12_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_25 = const()[name = string("concat_25"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = reshape_24_cast_fp16)[name = string("transpose_192")]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_25, x = transpose_25_cast_fp16)[name = string("reshape_25_cast_fp16")]; + tensor transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_13_reps_0 = const()[name = string("tile_13_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_26_cast_fp16 = transpose(perm = transpose_26_perm_0, x = V_for_attn_13_cast_fp16)[name = string("transpose_191")]; + tensor tile_13_cast_fp16 = tile(reps = tile_13_reps_0, x = transpose_26_cast_fp16)[name = string("tile_13_cast_fp16")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_26, x = tile_13_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_27 = const()[name = string("concat_27"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_27_cast_fp16 = transpose(perm = transpose_27_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_190")]; + tensor reshape_27_cast_fp16 = reshape(shape = concat_27, x = transpose_27_cast_fp16)[name = string("reshape_27_cast_fp16")]; + tensor V_expanded_13_perm_0 = const()[name = string("V_expanded_13_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor transpose_90_cast_fp16 = transpose(perm = transpose_90_perm_0, x = reshape_25_cast_fp16)[name = string("transpose_189")]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_55_cast_fp16, y = transpose_90_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_127_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_127_cast_fp16)[name = string("reduce_max_6")]; + tensor var_4777 = sub(x = x_127_cast_fp16, y = reduce_max_6)[name = string("op_4777")]; + tensor var_4783 = exp(x = var_4777)[name = string("op_4783")]; + tensor var_4793_axes_0 = const()[name = string("op_4793_axes_0"), val = tensor([-1])]; + bool var_4793_keep_dims_0 = const()[name = string("op_4793_keep_dims_0"), val = bool(true)]; + tensor var_4793 = reduce_sum(axes = var_4793_axes_0, keep_dims = var_4793_keep_dims_0, x = var_4783)[name = string("op_4793")]; + tensor var_4799_cast_fp16 = real_div(x = var_4783, y = var_4793)[name = string("op_4799_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor V_expanded_13_cast_fp16 = transpose(perm = V_expanded_13_perm_0, x = reshape_27_cast_fp16)[name = string("transpose_188")]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_4799_cast_fp16, y = V_expanded_13_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_4810 = const()[name = string("op_4810"), val = tensor([0, 2, 1, 3])]; + tensor var_4817 = const()[name = string("op_4817"), val = tensor([1, 1, -1])]; + tensor var_4811_cast_fp16 = transpose(perm = var_4810, x = attn_output_37_cast_fp16)[name = string("transpose_187")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_4817, x = var_4811_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_4822 = const()[name = string("op_4822"), val = tensor([0, 2, 1])]; + string var_4838_pad_type_0 = const()[name = string("op_4838_pad_type_0"), val = string("valid")]; + int32 var_4838_groups_0 = const()[name = string("op_4838_groups_0"), val = int32(1)]; + tensor var_4838_strides_0 = const()[name = string("op_4838_strides_0"), val = tensor([1])]; + tensor var_4838_pad_0 = const()[name = string("op_4838_pad_0"), val = tensor([0, 0])]; + tensor var_4838_dilations_0 = const()[name = string("op_4838_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935069824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(937691328))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4823_cast_fp16 = transpose(perm = var_4822, x = attn_output_39_cast_fp16)[name = string("transpose_186")]; + tensor var_4838_cast_fp16 = conv(dilations = var_4838_dilations_0, groups = var_4838_groups_0, pad = var_4838_pad_0, pad_type = var_4838_pad_type_0, strides = var_4838_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4823_cast_fp16)[name = string("op_4838_cast_fp16")]; + tensor var_4842 = const()[name = string("op_4842"), val = tensor([0, 2, 1])]; + int32 var_4848 = const()[name = string("op_4848"), val = int32(-1)]; + fp16 const_77_promoted_to_fp16 = const()[name = string("const_77_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_131_cast_fp16 = transpose(perm = var_4842, x = var_4838_cast_fp16)[name = string("transpose_185")]; + tensor var_4850_cast_fp16 = mul(x = x_131_cast_fp16, y = const_77_promoted_to_fp16)[name = string("op_4850_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_4848, interleave = input_191_interleave_0, values = (x_131_cast_fp16, var_4850_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_4845_to_fp16 = const()[name = string("op_4845_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_4845_to_fp16, x = input_191_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor var_4855_split_sizes_0 = const()[name = string("op_4855_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4855_axis_0 = const()[name = string("op_4855_axis_0"), val = int32(-1)]; + tensor var_4855_cast_fp16_0, tensor var_4855_cast_fp16_1 = split(axis = var_4855_axis_0, split_sizes = var_4855_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_4855_cast_fp16")]; + tensor layers_c2_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(937693952)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_4855_cast_fp16_0, y = layers_c2_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_119_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_133_cast_fp16")]; + int32 var_4864 = const()[name = string("op_4864"), val = int32(-1)]; + fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4866_cast_fp16 = mul(x = x_133_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_4866_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_4864, interleave = input_193_interleave_0, values = (x_133_cast_fp16, var_4866_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_4861_to_fp16 = const()[name = string("op_4861_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_4861_to_fp16, x = input_193_cast_fp16)[name = string("normed_185_cast_fp16")]; + tensor var_4871_split_sizes_0 = const()[name = string("op_4871_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4871_axis_0 = const()[name = string("op_4871_axis_0"), val = int32(-1)]; + tensor var_4871_cast_fp16_0, tensor var_4871_cast_fp16_1 = split(axis = var_4871_axis_0, split_sizes = var_4871_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_4871_cast_fp16")]; + tensor layers_c2_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(937699136)))]; + tensor h_39_cast_fp16 = mul(x = var_4871_cast_fp16_0, y = layers_c2_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_4882 = const()[name = string("op_4882"), val = tensor([0, 2, 1])]; + tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; + tensor var_4883 = transpose(perm = var_4882, x = h_39_cast_fp16)[name = string("transpose_184")]; + tensor input_195 = expand_dims(axes = input_195_axes_0, x = var_4883)[name = string("input_195")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_c2_6_mlp_gate_proj_weight_palettized, x = input_195)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_c2_6_mlp_up_proj_weight_palettized, x = input_195)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_197 = mul(x = gate_27, y = up_13)[name = string("input_197")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_c2_6_mlp_down_proj_weight_palettized, x = input_197)[name = string("mlp_out_13")]; + tensor var_4923_axes_0 = const()[name = string("op_4923_axes_0"), val = tensor([2])]; + tensor var_4923 = squeeze(axes = var_4923_axes_0, x = mlp_out_13)[name = string("op_4923")]; + tensor var_4927 = const()[name = string("op_4927"), val = tensor([0, 2, 1])]; + int32 var_4933 = const()[name = string("op_4933"), val = int32(-1)]; + fp16 const_79_promoted = const()[name = string("const_79_promoted"), val = fp16(-0x1p+0)]; + tensor x_135 = transpose(perm = var_4927, x = var_4923)[name = string("transpose_183")]; + tensor var_4935 = mul(x = x_135, y = const_79_promoted)[name = string("op_4935")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199 = concat(axis = var_4933, interleave = input_199_interleave_0, values = (x_135, var_4935))[name = string("input_199")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_4930_to_fp16 = const()[name = string("op_4930_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_4930_to_fp16, x = input_199)[name = string("normed_189_cast_fp16")]; + tensor var_4940_split_sizes_0 = const()[name = string("op_4940_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_4940_axis_0 = const()[name = string("op_4940_axis_0"), val = int32(-1)]; + tensor var_4940_0, tensor var_4940_1 = split(axis = var_4940_axis_0, split_sizes = var_4940_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_4940")]; + tensor hidden_states_63 = mul(x = var_4940_0, y = layers_c2_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_133_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 4608])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 1, 4864])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_4968 = const()[name = string("op_4968"), val = tensor([0, 2, 1])]; + tensor input_201_axes_0 = const()[name = string("input_201_axes_0"), val = tensor([2])]; + tensor var_4969 = transpose(perm = var_4968, x = hidden_states_65_cast_fp16)[name = string("transpose_182")]; + tensor input_201 = expand_dims(axes = input_201_axes_0, x = var_4969)[name = string("input_201")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_c2_6_per_layer_input_gate_weight_palettized, x = input_201)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_4988 = const()[name = string("op_4988"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_4989_cast_fp16 = transpose(perm = var_4988, x = per_layer_slice_13_cast_fp16)[name = string("transpose_181")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_4989_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_203_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_203_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_c2_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(937704320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938032064))))[name = string("layers_c2_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_c2_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_203_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_5005_axes_0 = const()[name = string("op_5005_axes_0"), val = tensor([2])]; + tensor var_5005_cast_fp16 = squeeze(axes = var_5005_axes_0, x = gated_41_cast_fp16)[name = string("op_5005_cast_fp16")]; + tensor var_5009 = const()[name = string("op_5009"), val = tensor([0, 2, 1])]; + int32 var_5015 = const()[name = string("op_5015"), val = int32(-1)]; + fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_137_cast_fp16 = transpose(perm = var_5009, x = var_5005_cast_fp16)[name = string("transpose_180")]; + tensor var_5017_cast_fp16 = mul(x = x_137_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_5017_cast_fp16")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205_cast_fp16 = concat(axis = var_5015, interleave = input_205_interleave_0, values = (x_137_cast_fp16, var_5017_cast_fp16))[name = string("input_205_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_5012_to_fp16 = const()[name = string("op_5012_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_5012_to_fp16, x = input_205_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor var_5022_split_sizes_0 = const()[name = string("op_5022_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5022_axis_0 = const()[name = string("op_5022_axis_0"), val = int32(-1)]; + tensor var_5022_cast_fp16_0, tensor var_5022_cast_fp16_1 = split(axis = var_5022_axis_0, split_sizes = var_5022_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_5022_cast_fp16")]; + tensor layers_c2_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938034688)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_5022_cast_fp16_0, y = layers_c2_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_81_promoted_to_fp16 = const()[name = string("const_81_promoted_to_fp16"), val = tensor([0x1.16p-1])]; + tensor x_139_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_81_promoted_to_fp16)[name = string("x_139_cast_fp16")]; + tensor var_5034_axes_0 = const()[name = string("op_5034_axes_0"), val = tensor([0])]; + tensor var_5034_cast_fp16 = squeeze(axes = var_5034_axes_0, x = K_sliding_out_11_cast_fp16)[name = string("op_5034_cast_fp16")]; + tensor var_5036_axes_0 = const()[name = string("op_5036_axes_0"), val = tensor([0])]; + tensor var_5036_cast_fp16 = squeeze(axes = var_5036_axes_0, x = V_sliding_out_11_cast_fp16)[name = string("op_5036_cast_fp16")]; + tensor var_5039_begin_0 = const()[name = string("op_5039_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_5039_end_0 = const()[name = string("op_5039_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_5039_end_mask_0 = const()[name = string("op_5039_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5039_squeeze_mask_0 = const()[name = string("op_5039_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5039_cast_fp16 = slice_by_index(begin = var_5039_begin_0, end = var_5039_end_0, end_mask = var_5039_end_mask_0, squeeze_mask = var_5039_squeeze_mask_0, x = K_sliding_in)[name = string("op_5039_cast_fp16")]; + tensor K_sliding_slot_13_axes_0 = const()[name = string("K_sliding_slot_13_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_13_cast_fp16 = expand_dims(axes = K_sliding_slot_13_axes_0, x = var_5039_cast_fp16)[name = string("K_sliding_slot_13_cast_fp16")]; + tensor var_5044_begin_0 = const()[name = string("op_5044_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_5044_end_0 = const()[name = string("op_5044_end_0"), val = tensor([7, 2, 512, 512])]; + tensor var_5044_end_mask_0 = const()[name = string("op_5044_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5044_squeeze_mask_0 = const()[name = string("op_5044_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5044_cast_fp16 = slice_by_index(begin = var_5044_begin_0, end = var_5044_end_0, end_mask = var_5044_end_mask_0, squeeze_mask = var_5044_squeeze_mask_0, x = V_sliding_in)[name = string("op_5044_cast_fp16")]; + tensor V_sliding_slot_13_axes_0 = const()[name = string("V_sliding_slot_13_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_13_cast_fp16 = expand_dims(axes = V_sliding_slot_13_axes_0, x = var_5044_cast_fp16)[name = string("V_sliding_slot_13_cast_fp16")]; + int32 var_5051 = const()[name = string("op_5051"), val = int32(-1)]; + fp16 const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5053_cast_fp16 = mul(x = x_139_cast_fp16, y = const_82_promoted_to_fp16)[name = string("op_5053_cast_fp16")]; + bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; + tensor input_207_cast_fp16 = concat(axis = var_5051, interleave = input_207_interleave_0, values = (x_139_cast_fp16, var_5053_cast_fp16))[name = string("input_207_cast_fp16")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_5048_to_fp16 = const()[name = string("op_5048_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_5048_to_fp16, x = input_207_cast_fp16)[name = string("normed_197_cast_fp16")]; + tensor var_5058_split_sizes_0 = const()[name = string("op_5058_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5058_axis_0 = const()[name = string("op_5058_axis_0"), val = int32(-1)]; + tensor var_5058_cast_fp16_0, tensor var_5058_cast_fp16_1 = split(axis = var_5058_axis_0, split_sizes = var_5058_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_5058_cast_fp16")]; + tensor layers_c2_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938039872)))]; + tensor h_43_cast_fp16 = mul(x = var_5058_cast_fp16_0, y = layers_c2_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_5064 = const()[name = string("op_5064"), val = tensor([0, 2, 1])]; + tensor var_5067_axes_0 = const()[name = string("op_5067_axes_0"), val = tensor([2])]; + tensor var_5065_cast_fp16 = transpose(perm = var_5064, x = h_43_cast_fp16)[name = string("transpose_179")]; + tensor var_5067_cast_fp16 = expand_dims(axes = var_5067_axes_0, x = var_5065_cast_fp16)[name = string("op_5067_cast_fp16")]; + string var_5083_pad_type_0 = const()[name = string("op_5083_pad_type_0"), val = string("valid")]; + tensor var_5083_strides_0 = const()[name = string("op_5083_strides_0"), val = tensor([1, 1])]; + tensor var_5083_pad_0 = const()[name = string("op_5083_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5083_dilations_0 = const()[name = string("op_5083_dilations_0"), val = tensor([1, 1])]; + int32 var_5083_groups_0 = const()[name = string("op_5083_groups_0"), val = int32(1)]; + tensor var_5083 = conv(dilations = var_5083_dilations_0, groups = var_5083_groups_0, pad = var_5083_pad_0, pad_type = var_5083_pad_type_0, strides = var_5083_strides_0, weight = layers_c2_7_self_attn_q_proj_weight_palettized, x = var_5067_cast_fp16)[name = string("op_5083")]; + tensor var_5088 = const()[name = string("op_5088"), val = tensor([1, 8, 256, 1])]; + tensor var_5089 = reshape(shape = var_5088, x = var_5083)[name = string("op_5089")]; + tensor var_5094 = const()[name = string("op_5094"), val = tensor([0, 1, 3, 2])]; + tensor var_5104 = const()[name = string("op_5104"), val = tensor([1, 8, 256])]; + tensor var_5095 = transpose(perm = var_5094, x = var_5089)[name = string("transpose_178")]; + tensor x_141 = reshape(shape = var_5104, x = var_5095)[name = string("x_141")]; + int32 var_5110 = const()[name = string("op_5110"), val = int32(-1)]; + fp16 const_83_promoted = const()[name = string("const_83_promoted"), val = fp16(-0x1p+0)]; + tensor var_5112 = mul(x = x_141, y = const_83_promoted)[name = string("op_5112")]; + bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; + tensor input_211 = concat(axis = var_5110, interleave = input_211_interleave_0, values = (x_141, var_5112))[name = string("input_211")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_5107_to_fp16 = const()[name = string("op_5107_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_5107_to_fp16, x = input_211)[name = string("normed_201_cast_fp16")]; + tensor var_5117_split_sizes_0 = const()[name = string("op_5117_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5117_axis_0 = const()[name = string("op_5117_axis_0"), val = int32(-1)]; + tensor var_5117_0, tensor var_5117_1 = split(axis = var_5117_axis_0, split_sizes = var_5117_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_5117")]; + tensor var_5119 = mul(x = var_5117_0, y = layers_c2_7_self_attn_q_norm_weight)[name = string("op_5119")]; + tensor var_5124 = const()[name = string("op_5124"), val = tensor([1, 8, 1, 256])]; + tensor q_59 = reshape(shape = var_5124, x = var_5119)[name = string("q_59")]; + tensor var_5126_cast_fp16 = mul(x = q_59, y = cos_s)[name = string("op_5126_cast_fp16")]; + tensor var_5127_split_sizes_0 = const()[name = string("op_5127_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5127_axis_0 = const()[name = string("op_5127_axis_0"), val = int32(-1)]; + tensor var_5127_0, tensor var_5127_1 = split(axis = var_5127_axis_0, split_sizes = var_5127_split_sizes_0, x = q_59)[name = string("op_5127")]; + fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; + tensor var_5129 = mul(x = var_5127_1, y = const_84_promoted)[name = string("op_5129")]; + int32 var_5131 = const()[name = string("op_5131"), val = int32(-1)]; + bool var_5132_interleave_0 = const()[name = string("op_5132_interleave_0"), val = bool(false)]; + tensor var_5132 = concat(axis = var_5131, interleave = var_5132_interleave_0, values = (var_5129, var_5127_0))[name = string("op_5132")]; + tensor var_5133_cast_fp16 = mul(x = var_5132, y = sin_s)[name = string("op_5133_cast_fp16")]; + tensor q_63_cast_fp16 = add(x = var_5126_cast_fp16, y = var_5133_cast_fp16)[name = string("q_63_cast_fp16")]; + string var_5146_pad_type_0 = const()[name = string("op_5146_pad_type_0"), val = string("valid")]; + tensor var_5146_strides_0 = const()[name = string("op_5146_strides_0"), val = tensor([1, 1])]; + tensor var_5146_pad_0 = const()[name = string("op_5146_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5146_dilations_0 = const()[name = string("op_5146_dilations_0"), val = tensor([1, 1])]; + int32 var_5146_groups_0 = const()[name = string("op_5146_groups_0"), val = int32(1)]; + tensor var_5146 = conv(dilations = var_5146_dilations_0, groups = var_5146_groups_0, pad = var_5146_pad_0, pad_type = var_5146_pad_type_0, strides = var_5146_strides_0, weight = layers_c2_7_self_attn_k_proj_weight_palettized, x = var_5067_cast_fp16)[name = string("op_5146")]; + tensor var_5151 = const()[name = string("op_5151"), val = tensor([1, 2, 256, 1])]; + tensor var_5152 = reshape(shape = var_5151, x = var_5146)[name = string("op_5152")]; + tensor var_5157 = const()[name = string("op_5157"), val = tensor([0, 1, 3, 2])]; + string var_5174_pad_type_0 = const()[name = string("op_5174_pad_type_0"), val = string("valid")]; + tensor var_5174_strides_0 = const()[name = string("op_5174_strides_0"), val = tensor([1, 1])]; + tensor var_5174_pad_0 = const()[name = string("op_5174_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5174_dilations_0 = const()[name = string("op_5174_dilations_0"), val = tensor([1, 1])]; + int32 var_5174_groups_0 = const()[name = string("op_5174_groups_0"), val = int32(1)]; + tensor var_5174 = conv(dilations = var_5174_dilations_0, groups = var_5174_groups_0, pad = var_5174_pad_0, pad_type = var_5174_pad_type_0, strides = var_5174_strides_0, weight = layers_c2_7_self_attn_v_proj_weight_palettized, x = var_5067_cast_fp16)[name = string("op_5174")]; + tensor var_5179 = const()[name = string("op_5179"), val = tensor([1, 2, 256, 1])]; + tensor var_5180 = reshape(shape = var_5179, x = var_5174)[name = string("op_5180")]; + tensor var_5185 = const()[name = string("op_5185"), val = tensor([0, 1, 3, 2])]; + tensor var_5195 = const()[name = string("op_5195"), val = tensor([1, 2, 256])]; + tensor var_5158 = transpose(perm = var_5157, x = var_5152)[name = string("transpose_177")]; + tensor x_143 = reshape(shape = var_5195, x = var_5158)[name = string("x_143")]; + int32 var_5201 = const()[name = string("op_5201"), val = int32(-1)]; + fp16 const_85_promoted = const()[name = string("const_85_promoted"), val = fp16(-0x1p+0)]; + tensor var_5203 = mul(x = x_143, y = const_85_promoted)[name = string("op_5203")]; + bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; + tensor input_213 = concat(axis = var_5201, interleave = input_213_interleave_0, values = (x_143, var_5203))[name = string("input_213")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_5198_to_fp16 = const()[name = string("op_5198_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_5198_to_fp16, x = input_213)[name = string("normed_205_cast_fp16")]; + tensor var_5208_split_sizes_0 = const()[name = string("op_5208_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5208_axis_0 = const()[name = string("op_5208_axis_0"), val = int32(-1)]; + tensor var_5208_0, tensor var_5208_1 = split(axis = var_5208_axis_0, split_sizes = var_5208_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_5208")]; + tensor var_5210 = mul(x = var_5208_0, y = layers_c2_7_self_attn_k_norm_weight)[name = string("op_5210")]; + tensor var_5215 = const()[name = string("op_5215"), val = tensor([1, 2, 1, 256])]; + tensor q_61 = reshape(shape = var_5215, x = var_5210)[name = string("q_61")]; + fp16 var_5217_promoted = const()[name = string("op_5217_promoted"), val = fp16(0x1p+1)]; + tensor var_5186 = transpose(perm = var_5185, x = var_5180)[name = string("transpose_176")]; + tensor var_5218 = pow(x = var_5186, y = var_5217_promoted)[name = string("op_5218")]; + tensor var_5223_axes_0 = const()[name = string("op_5223_axes_0"), val = tensor([-1])]; + bool var_5223_keep_dims_0 = const()[name = string("op_5223_keep_dims_0"), val = bool(true)]; + tensor var_5223 = reduce_mean(axes = var_5223_axes_0, keep_dims = var_5223_keep_dims_0, x = var_5218)[name = string("op_5223")]; + fp16 var_5225_to_fp16 = const()[name = string("op_5225_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_15_cast_fp16 = add(x = var_5223, y = var_5225_to_fp16)[name = string("mean_sq_15_cast_fp16")]; + fp32 var_5227_epsilon_0 = const()[name = string("op_5227_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5227_cast_fp16 = rsqrt(epsilon = var_5227_epsilon_0, x = mean_sq_15_cast_fp16)[name = string("op_5227_cast_fp16")]; + tensor input_217_cast_fp16 = mul(x = var_5186, y = var_5227_cast_fp16)[name = string("input_217_cast_fp16")]; + tensor var_5229_cast_fp16 = mul(x = q_61, y = cos_s)[name = string("op_5229_cast_fp16")]; + tensor var_5230_split_sizes_0 = const()[name = string("op_5230_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5230_axis_0 = const()[name = string("op_5230_axis_0"), val = int32(-1)]; + tensor var_5230_0, tensor var_5230_1 = split(axis = var_5230_axis_0, split_sizes = var_5230_split_sizes_0, x = q_61)[name = string("op_5230")]; + fp16 const_86_promoted = const()[name = string("const_86_promoted"), val = fp16(-0x1p+0)]; + tensor var_5232 = mul(x = var_5230_1, y = const_86_promoted)[name = string("op_5232")]; + int32 var_5234 = const()[name = string("op_5234"), val = int32(-1)]; + bool var_5235_interleave_0 = const()[name = string("op_5235_interleave_0"), val = bool(false)]; + tensor var_5235 = concat(axis = var_5234, interleave = var_5235_interleave_0, values = (var_5232, var_5230_0))[name = string("op_5235")]; + tensor var_5236_cast_fp16 = mul(x = var_5235, y = sin_s)[name = string("op_5236_cast_fp16")]; + tensor input_215_cast_fp16 = add(x = var_5229_cast_fp16, y = var_5236_cast_fp16)[name = string("input_215_cast_fp16")]; + tensor k_padded_13_pad_0 = const()[name = string("k_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_13_mode_0 = const()[name = string("k_padded_13_mode_0"), val = string("constant")]; + fp16 const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_13_cast_fp16 = pad(constant_val = const_87_to_fp16, mode = k_padded_13_mode_0, pad = k_padded_13_pad_0, x = input_215_cast_fp16)[name = string("k_padded_13_cast_fp16")]; + tensor v_padded_13_pad_0 = const()[name = string("v_padded_13_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_13_mode_0 = const()[name = string("v_padded_13_mode_0"), val = string("constant")]; + fp16 const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_13_cast_fp16 = pad(constant_val = const_88_to_fp16, mode = v_padded_13_mode_0, pad = v_padded_13_pad_0, x = input_217_cast_fp16)[name = string("v_padded_13_cast_fp16")]; + tensor var_5265_begin_0 = const()[name = string("op_5265_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5265_end_0 = const()[name = string("op_5265_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5265_end_mask_0 = const()[name = string("op_5265_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5265_cast_fp16 = slice_by_index(begin = var_5265_begin_0, end = var_5265_end_0, end_mask = var_5265_end_mask_0, x = K_sliding_slot_13_cast_fp16)[name = string("op_5265_cast_fp16")]; + int32 var_5272 = const()[name = string("op_5272"), val = int32(2)]; + bool K_sliding_out_13_interleave_0 = const()[name = string("K_sliding_out_13_interleave_0"), val = bool(false)]; + tensor K_sliding_out_13_cast_fp16 = concat(axis = var_5272, interleave = K_sliding_out_13_interleave_0, values = (var_5265_cast_fp16, k_padded_13_cast_fp16))[name = string("K_sliding_out_13_cast_fp16")]; + tensor var_5288_begin_0 = const()[name = string("op_5288_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5288_end_0 = const()[name = string("op_5288_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5288_end_mask_0 = const()[name = string("op_5288_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5288_cast_fp16 = slice_by_index(begin = var_5288_begin_0, end = var_5288_end_0, end_mask = var_5288_end_mask_0, x = V_sliding_slot_13_cast_fp16)[name = string("op_5288_cast_fp16")]; + int32 var_5295 = const()[name = string("op_5295"), val = int32(2)]; + bool V_sliding_out_13_interleave_0 = const()[name = string("V_sliding_out_13_interleave_0"), val = bool(false)]; + tensor V_sliding_out_13_cast_fp16 = concat(axis = var_5295, interleave = V_sliding_out_13_interleave_0, values = (var_5288_cast_fp16, v_padded_13_cast_fp16))[name = string("V_sliding_out_13_cast_fp16")]; + tensor K_for_attn_15_begin_0 = const()[name = string("K_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_15_end_0 = const()[name = string("K_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_15_end_mask_0 = const()[name = string("K_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_15_cast_fp16 = slice_by_index(begin = K_for_attn_15_begin_0, end = K_for_attn_15_end_0, end_mask = K_for_attn_15_end_mask_0, x = K_sliding_out_13_cast_fp16)[name = string("K_for_attn_15_cast_fp16")]; + tensor V_for_attn_15_begin_0 = const()[name = string("V_for_attn_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_15_end_0 = const()[name = string("V_for_attn_15_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_15_end_mask_0 = const()[name = string("V_for_attn_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_15_cast_fp16 = slice_by_index(begin = V_for_attn_15_begin_0, end = V_for_attn_15_end_0, end_mask = V_for_attn_15_end_mask_0, x = V_sliding_out_13_cast_fp16)[name = string("V_for_attn_15_cast_fp16")]; + tensor transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_14_reps_0 = const()[name = string("tile_14_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_28_cast_fp16 = transpose(perm = transpose_28_perm_0, x = K_for_attn_15_cast_fp16)[name = string("transpose_175")]; + tensor tile_14_cast_fp16 = tile(reps = tile_14_reps_0, x = transpose_28_cast_fp16)[name = string("tile_14_cast_fp16")]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_28_cast_fp16 = reshape(shape = concat_28, x = tile_14_cast_fp16)[name = string("reshape_28_cast_fp16")]; + tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = reshape_28_cast_fp16)[name = string("transpose_174")]; + tensor reshape_29_cast_fp16 = reshape(shape = concat_29, x = transpose_29_cast_fp16)[name = string("reshape_29_cast_fp16")]; + tensor transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_30_perm_0 = const()[name = string("transpose_30_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_15_reps_0 = const()[name = string("tile_15_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_30_cast_fp16 = transpose(perm = transpose_30_perm_0, x = V_for_attn_15_cast_fp16)[name = string("transpose_173")]; + tensor tile_15_cast_fp16 = tile(reps = tile_15_reps_0, x = transpose_30_cast_fp16)[name = string("tile_15_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_30_cast_fp16 = reshape(shape = concat_30, x = tile_15_cast_fp16)[name = string("reshape_30_cast_fp16")]; + tensor transpose_31_perm_0 = const()[name = string("transpose_31_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_31_cast_fp16 = transpose(perm = transpose_31_perm_0, x = reshape_30_cast_fp16)[name = string("transpose_172")]; + tensor reshape_31_cast_fp16 = reshape(shape = concat_31, x = transpose_31_cast_fp16)[name = string("reshape_31_cast_fp16")]; + tensor V_expanded_15_perm_0 = const()[name = string("V_expanded_15_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor transpose_91_cast_fp16 = transpose(perm = transpose_91_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_171")]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_63_cast_fp16, y = transpose_91_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_147_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_147_cast_fp16)[name = string("reduce_max_7")]; + tensor var_5336 = sub(x = x_147_cast_fp16, y = reduce_max_7)[name = string("op_5336")]; + tensor var_5342 = exp(x = var_5336)[name = string("op_5342")]; + tensor var_5352_axes_0 = const()[name = string("op_5352_axes_0"), val = tensor([-1])]; + bool var_5352_keep_dims_0 = const()[name = string("op_5352_keep_dims_0"), val = bool(true)]; + tensor var_5352 = reduce_sum(axes = var_5352_axes_0, keep_dims = var_5352_keep_dims_0, x = var_5342)[name = string("op_5352")]; + tensor var_5358_cast_fp16 = real_div(x = var_5342, y = var_5352)[name = string("op_5358_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor V_expanded_15_cast_fp16 = transpose(perm = V_expanded_15_perm_0, x = reshape_31_cast_fp16)[name = string("transpose_170")]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_5358_cast_fp16, y = V_expanded_15_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_5369 = const()[name = string("op_5369"), val = tensor([0, 2, 1, 3])]; + tensor var_5376 = const()[name = string("op_5376"), val = tensor([1, 1, -1])]; + tensor var_5370_cast_fp16 = transpose(perm = var_5369, x = attn_output_43_cast_fp16)[name = string("transpose_169")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_5376, x = var_5370_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_5381 = const()[name = string("op_5381"), val = tensor([0, 2, 1])]; + string var_5397_pad_type_0 = const()[name = string("op_5397_pad_type_0"), val = string("valid")]; + int32 var_5397_groups_0 = const()[name = string("op_5397_groups_0"), val = int32(1)]; + tensor var_5397_strides_0 = const()[name = string("op_5397_strides_0"), val = tensor([1])]; + tensor var_5397_pad_0 = const()[name = string("op_5397_pad_0"), val = tensor([0, 0])]; + tensor var_5397_dilations_0 = const()[name = string("op_5397_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938045056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(940666560))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5382_cast_fp16 = transpose(perm = var_5381, x = attn_output_45_cast_fp16)[name = string("transpose_168")]; + tensor var_5397_cast_fp16 = conv(dilations = var_5397_dilations_0, groups = var_5397_groups_0, pad = var_5397_pad_0, pad_type = var_5397_pad_type_0, strides = var_5397_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5382_cast_fp16)[name = string("op_5397_cast_fp16")]; + tensor var_5401 = const()[name = string("op_5401"), val = tensor([0, 2, 1])]; + int32 var_5407 = const()[name = string("op_5407"), val = int32(-1)]; + fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_151_cast_fp16 = transpose(perm = var_5401, x = var_5397_cast_fp16)[name = string("transpose_167")]; + tensor var_5409_cast_fp16 = mul(x = x_151_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_5409_cast_fp16")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221_cast_fp16 = concat(axis = var_5407, interleave = input_221_interleave_0, values = (x_151_cast_fp16, var_5409_cast_fp16))[name = string("input_221_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_5404_to_fp16 = const()[name = string("op_5404_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_5404_to_fp16, x = input_221_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor var_5414_split_sizes_0 = const()[name = string("op_5414_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5414_axis_0 = const()[name = string("op_5414_axis_0"), val = int32(-1)]; + tensor var_5414_cast_fp16_0, tensor var_5414_cast_fp16_1 = split(axis = var_5414_axis_0, split_sizes = var_5414_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_5414_cast_fp16")]; + tensor layers_c2_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(940669184)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_5414_cast_fp16_0, y = layers_c2_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_139_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_153_cast_fp16")]; + int32 var_5423 = const()[name = string("op_5423"), val = int32(-1)]; + fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5425_cast_fp16 = mul(x = x_153_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_5425_cast_fp16")]; + bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)]; + tensor input_223_cast_fp16 = concat(axis = var_5423, interleave = input_223_interleave_0, values = (x_153_cast_fp16, var_5425_cast_fp16))[name = string("input_223_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_5420_to_fp16 = const()[name = string("op_5420_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_5420_to_fp16, x = input_223_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_5430_split_sizes_0 = const()[name = string("op_5430_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5430_axis_0 = const()[name = string("op_5430_axis_0"), val = int32(-1)]; + tensor var_5430_cast_fp16_0, tensor var_5430_cast_fp16_1 = split(axis = var_5430_axis_0, split_sizes = var_5430_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_5430_cast_fp16")]; + tensor layers_c2_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(940674368)))]; + tensor h_45_cast_fp16 = mul(x = var_5430_cast_fp16_0, y = layers_c2_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_5441 = const()[name = string("op_5441"), val = tensor([0, 2, 1])]; + tensor input_225_axes_0 = const()[name = string("input_225_axes_0"), val = tensor([2])]; + tensor var_5442 = transpose(perm = var_5441, x = h_45_cast_fp16)[name = string("transpose_166")]; + tensor input_225 = expand_dims(axes = input_225_axes_0, x = var_5442)[name = string("input_225")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_c2_7_mlp_gate_proj_weight_palettized, x = input_225)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_c2_7_mlp_up_proj_weight_palettized, x = input_225)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_227 = mul(x = gate_31, y = up_15)[name = string("input_227")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_c2_7_mlp_down_proj_weight_palettized, x = input_227)[name = string("mlp_out_15")]; + tensor var_5482_axes_0 = const()[name = string("op_5482_axes_0"), val = tensor([2])]; + tensor var_5482 = squeeze(axes = var_5482_axes_0, x = mlp_out_15)[name = string("op_5482")]; + tensor var_5486 = const()[name = string("op_5486"), val = tensor([0, 2, 1])]; + int32 var_5492 = const()[name = string("op_5492"), val = int32(-1)]; + fp16 const_91_promoted = const()[name = string("const_91_promoted"), val = fp16(-0x1p+0)]; + tensor x_155 = transpose(perm = var_5486, x = var_5482)[name = string("transpose_165")]; + tensor var_5494 = mul(x = x_155, y = const_91_promoted)[name = string("op_5494")]; + bool input_229_interleave_0 = const()[name = string("input_229_interleave_0"), val = bool(false)]; + tensor input_229 = concat(axis = var_5492, interleave = input_229_interleave_0, values = (x_155, var_5494))[name = string("input_229")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_5489_to_fp16 = const()[name = string("op_5489_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_5489_to_fp16, x = input_229)[name = string("normed_217_cast_fp16")]; + tensor var_5499_split_sizes_0 = const()[name = string("op_5499_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5499_axis_0 = const()[name = string("op_5499_axis_0"), val = int32(-1)]; + tensor var_5499_0, tensor var_5499_1 = split(axis = var_5499_axis_0, split_sizes = var_5499_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_5499")]; + tensor hidden_states_73 = mul(x = var_5499_0, y = layers_c2_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_153_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 4864])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 1, 5120])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_5527 = const()[name = string("op_5527"), val = tensor([0, 2, 1])]; + tensor input_231_axes_0 = const()[name = string("input_231_axes_0"), val = tensor([2])]; + tensor var_5528 = transpose(perm = var_5527, x = hidden_states_75_cast_fp16)[name = string("transpose_164")]; + tensor input_231 = expand_dims(axes = input_231_axes_0, x = var_5528)[name = string("input_231")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_c2_7_per_layer_input_gate_weight_palettized, x = input_231)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_5547 = const()[name = string("op_5547"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_5548_cast_fp16 = transpose(perm = var_5547, x = per_layer_slice_15_cast_fp16)[name = string("transpose_163")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_5548_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_233_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_233_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_c2_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(940679552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(941007296))))[name = string("layers_c2_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_c2_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_233_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_5564_axes_0 = const()[name = string("op_5564_axes_0"), val = tensor([2])]; + tensor var_5564_cast_fp16 = squeeze(axes = var_5564_axes_0, x = gated_47_cast_fp16)[name = string("op_5564_cast_fp16")]; + tensor var_5568 = const()[name = string("op_5568"), val = tensor([0, 2, 1])]; + int32 var_5574 = const()[name = string("op_5574"), val = int32(-1)]; + fp16 const_92_promoted_to_fp16 = const()[name = string("const_92_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_157_cast_fp16 = transpose(perm = var_5568, x = var_5564_cast_fp16)[name = string("transpose_162")]; + tensor var_5576_cast_fp16 = mul(x = x_157_cast_fp16, y = const_92_promoted_to_fp16)[name = string("op_5576_cast_fp16")]; + bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)]; + tensor input_235_cast_fp16 = concat(axis = var_5574, interleave = input_235_interleave_0, values = (x_157_cast_fp16, var_5576_cast_fp16))[name = string("input_235_cast_fp16")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_5571_to_fp16 = const()[name = string("op_5571_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_5571_to_fp16, x = input_235_cast_fp16)[name = string("normed_221_cast_fp16")]; + tensor var_5581_split_sizes_0 = const()[name = string("op_5581_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5581_axis_0 = const()[name = string("op_5581_axis_0"), val = int32(-1)]; + tensor var_5581_cast_fp16_0, tensor var_5581_cast_fp16_1 = split(axis = var_5581_axis_0, split_sizes = var_5581_split_sizes_0, x = normed_221_cast_fp16)[name = string("op_5581_cast_fp16")]; + tensor layers_c2_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(941009920)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_5581_cast_fp16_0, y = layers_c2_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = tensor([0x1.06p-1])]; + tensor x_159_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_93_promoted_to_fp16)[name = string("x_159_cast_fp16")]; + tensor var_5593_axes_0 = const()[name = string("op_5593_axes_0"), val = tensor([0])]; + tensor var_5593_cast_fp16 = squeeze(axes = var_5593_axes_0, x = K_sliding_out_13_cast_fp16)[name = string("op_5593_cast_fp16")]; + tensor var_5595_axes_0 = const()[name = string("op_5595_axes_0"), val = tensor([0])]; + tensor var_5595_cast_fp16 = squeeze(axes = var_5595_axes_0, x = V_sliding_out_13_cast_fp16)[name = string("op_5595_cast_fp16")]; + tensor var_5598_begin_0 = const()[name = string("op_5598_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5598_end_0 = const()[name = string("op_5598_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_5598_end_mask_0 = const()[name = string("op_5598_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5598_squeeze_mask_0 = const()[name = string("op_5598_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5598_cast_fp16 = slice_by_index(begin = var_5598_begin_0, end = var_5598_end_0, end_mask = var_5598_end_mask_0, squeeze_mask = var_5598_squeeze_mask_0, x = K_sliding_in)[name = string("op_5598_cast_fp16")]; + tensor K_sliding_slot_15_axes_0 = const()[name = string("K_sliding_slot_15_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_15_cast_fp16 = expand_dims(axes = K_sliding_slot_15_axes_0, x = var_5598_cast_fp16)[name = string("K_sliding_slot_15_cast_fp16")]; + tensor var_5603_begin_0 = const()[name = string("op_5603_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5603_end_0 = const()[name = string("op_5603_end_0"), val = tensor([8, 2, 512, 512])]; + tensor var_5603_end_mask_0 = const()[name = string("op_5603_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5603_squeeze_mask_0 = const()[name = string("op_5603_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_5603_cast_fp16 = slice_by_index(begin = var_5603_begin_0, end = var_5603_end_0, end_mask = var_5603_end_mask_0, squeeze_mask = var_5603_squeeze_mask_0, x = V_sliding_in)[name = string("op_5603_cast_fp16")]; + tensor V_sliding_slot_15_axes_0 = const()[name = string("V_sliding_slot_15_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_15_cast_fp16 = expand_dims(axes = V_sliding_slot_15_axes_0, x = var_5603_cast_fp16)[name = string("V_sliding_slot_15_cast_fp16")]; + int32 var_5610 = const()[name = string("op_5610"), val = int32(-1)]; + fp16 const_94_promoted_to_fp16 = const()[name = string("const_94_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5612_cast_fp16 = mul(x = x_159_cast_fp16, y = const_94_promoted_to_fp16)[name = string("op_5612_cast_fp16")]; + bool input_237_interleave_0 = const()[name = string("input_237_interleave_0"), val = bool(false)]; + tensor input_237_cast_fp16 = concat(axis = var_5610, interleave = input_237_interleave_0, values = (x_159_cast_fp16, var_5612_cast_fp16))[name = string("input_237_cast_fp16")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_5607_to_fp16 = const()[name = string("op_5607_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_5607_to_fp16, x = input_237_cast_fp16)[name = string("normed_225_cast_fp16")]; + tensor var_5617_split_sizes_0 = const()[name = string("op_5617_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5617_axis_0 = const()[name = string("op_5617_axis_0"), val = int32(-1)]; + tensor var_5617_cast_fp16_0, tensor var_5617_cast_fp16_1 = split(axis = var_5617_axis_0, split_sizes = var_5617_split_sizes_0, x = normed_225_cast_fp16)[name = string("op_5617_cast_fp16")]; + tensor layers_c2_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(941015104)))]; + tensor h_49_cast_fp16 = mul(x = var_5617_cast_fp16_0, y = layers_c2_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_5623 = const()[name = string("op_5623"), val = tensor([0, 2, 1])]; + tensor var_5626_axes_0 = const()[name = string("op_5626_axes_0"), val = tensor([2])]; + tensor var_5624_cast_fp16 = transpose(perm = var_5623, x = h_49_cast_fp16)[name = string("transpose_161")]; + tensor var_5626_cast_fp16 = expand_dims(axes = var_5626_axes_0, x = var_5624_cast_fp16)[name = string("op_5626_cast_fp16")]; + string var_5642_pad_type_0 = const()[name = string("op_5642_pad_type_0"), val = string("valid")]; + tensor var_5642_strides_0 = const()[name = string("op_5642_strides_0"), val = tensor([1, 1])]; + tensor var_5642_pad_0 = const()[name = string("op_5642_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5642_dilations_0 = const()[name = string("op_5642_dilations_0"), val = tensor([1, 1])]; + int32 var_5642_groups_0 = const()[name = string("op_5642_groups_0"), val = int32(1)]; + tensor var_5642 = conv(dilations = var_5642_dilations_0, groups = var_5642_groups_0, pad = var_5642_pad_0, pad_type = var_5642_pad_type_0, strides = var_5642_strides_0, weight = layers_c2_8_self_attn_q_proj_weight_palettized, x = var_5626_cast_fp16)[name = string("op_5642")]; + tensor var_5647 = const()[name = string("op_5647"), val = tensor([1, 8, 256, 1])]; + tensor var_5648 = reshape(shape = var_5647, x = var_5642)[name = string("op_5648")]; + tensor var_5653 = const()[name = string("op_5653"), val = tensor([0, 1, 3, 2])]; + tensor var_5663 = const()[name = string("op_5663"), val = tensor([1, 8, 256])]; + tensor var_5654 = transpose(perm = var_5653, x = var_5648)[name = string("transpose_160")]; + tensor x_161 = reshape(shape = var_5663, x = var_5654)[name = string("x_161")]; + int32 var_5669 = const()[name = string("op_5669"), val = int32(-1)]; + fp16 const_95_promoted = const()[name = string("const_95_promoted"), val = fp16(-0x1p+0)]; + tensor var_5671 = mul(x = x_161, y = const_95_promoted)[name = string("op_5671")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241 = concat(axis = var_5669, interleave = input_241_interleave_0, values = (x_161, var_5671))[name = string("input_241")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_5666_to_fp16 = const()[name = string("op_5666_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_5666_to_fp16, x = input_241)[name = string("normed_229_cast_fp16")]; + tensor var_5676_split_sizes_0 = const()[name = string("op_5676_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5676_axis_0 = const()[name = string("op_5676_axis_0"), val = int32(-1)]; + tensor var_5676_0, tensor var_5676_1 = split(axis = var_5676_axis_0, split_sizes = var_5676_split_sizes_0, x = normed_229_cast_fp16)[name = string("op_5676")]; + tensor var_5683 = const()[name = string("op_5683"), val = tensor([1, 8, 1, 256])]; + tensor q_67 = reshape(shape = var_5683, x = var_5676_0)[name = string("q_67")]; + tensor var_5685_cast_fp16 = mul(x = q_67, y = cos_s)[name = string("op_5685_cast_fp16")]; + tensor var_5686_split_sizes_0 = const()[name = string("op_5686_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5686_axis_0 = const()[name = string("op_5686_axis_0"), val = int32(-1)]; + tensor var_5686_0, tensor var_5686_1 = split(axis = var_5686_axis_0, split_sizes = var_5686_split_sizes_0, x = q_67)[name = string("op_5686")]; + fp16 const_96_promoted = const()[name = string("const_96_promoted"), val = fp16(-0x1p+0)]; + tensor var_5688 = mul(x = var_5686_1, y = const_96_promoted)[name = string("op_5688")]; + int32 var_5690 = const()[name = string("op_5690"), val = int32(-1)]; + bool var_5691_interleave_0 = const()[name = string("op_5691_interleave_0"), val = bool(false)]; + tensor var_5691 = concat(axis = var_5690, interleave = var_5691_interleave_0, values = (var_5688, var_5686_0))[name = string("op_5691")]; + tensor var_5692_cast_fp16 = mul(x = var_5691, y = sin_s)[name = string("op_5692_cast_fp16")]; + tensor q_71_cast_fp16 = add(x = var_5685_cast_fp16, y = var_5692_cast_fp16)[name = string("q_71_cast_fp16")]; + string var_5705_pad_type_0 = const()[name = string("op_5705_pad_type_0"), val = string("valid")]; + tensor var_5705_strides_0 = const()[name = string("op_5705_strides_0"), val = tensor([1, 1])]; + tensor var_5705_pad_0 = const()[name = string("op_5705_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5705_dilations_0 = const()[name = string("op_5705_dilations_0"), val = tensor([1, 1])]; + int32 var_5705_groups_0 = const()[name = string("op_5705_groups_0"), val = int32(1)]; + tensor var_5705 = conv(dilations = var_5705_dilations_0, groups = var_5705_groups_0, pad = var_5705_pad_0, pad_type = var_5705_pad_type_0, strides = var_5705_strides_0, weight = layers_c2_8_self_attn_k_proj_weight_palettized, x = var_5626_cast_fp16)[name = string("op_5705")]; + tensor var_5710 = const()[name = string("op_5710"), val = tensor([1, 2, 256, 1])]; + tensor var_5711 = reshape(shape = var_5710, x = var_5705)[name = string("op_5711")]; + tensor var_5716 = const()[name = string("op_5716"), val = tensor([0, 1, 3, 2])]; + string var_5733_pad_type_0 = const()[name = string("op_5733_pad_type_0"), val = string("valid")]; + tensor var_5733_strides_0 = const()[name = string("op_5733_strides_0"), val = tensor([1, 1])]; + tensor var_5733_pad_0 = const()[name = string("op_5733_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5733_dilations_0 = const()[name = string("op_5733_dilations_0"), val = tensor([1, 1])]; + int32 var_5733_groups_0 = const()[name = string("op_5733_groups_0"), val = int32(1)]; + tensor var_5733 = conv(dilations = var_5733_dilations_0, groups = var_5733_groups_0, pad = var_5733_pad_0, pad_type = var_5733_pad_type_0, strides = var_5733_strides_0, weight = layers_c2_8_self_attn_v_proj_weight_palettized, x = var_5626_cast_fp16)[name = string("op_5733")]; + tensor var_5738 = const()[name = string("op_5738"), val = tensor([1, 2, 256, 1])]; + tensor var_5739 = reshape(shape = var_5738, x = var_5733)[name = string("op_5739")]; + tensor var_5744 = const()[name = string("op_5744"), val = tensor([0, 1, 3, 2])]; + tensor var_5754 = const()[name = string("op_5754"), val = tensor([1, 2, 256])]; + tensor var_5717 = transpose(perm = var_5716, x = var_5711)[name = string("transpose_159")]; + tensor x_163 = reshape(shape = var_5754, x = var_5717)[name = string("x_163")]; + int32 var_5760 = const()[name = string("op_5760"), val = int32(-1)]; + fp16 const_97_promoted = const()[name = string("const_97_promoted"), val = fp16(-0x1p+0)]; + tensor var_5762 = mul(x = x_163, y = const_97_promoted)[name = string("op_5762")]; + bool input_243_interleave_0 = const()[name = string("input_243_interleave_0"), val = bool(false)]; + tensor input_243 = concat(axis = var_5760, interleave = input_243_interleave_0, values = (x_163, var_5762))[name = string("input_243")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_5757_to_fp16 = const()[name = string("op_5757_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_5757_to_fp16, x = input_243)[name = string("normed_233_cast_fp16")]; + tensor var_5767_split_sizes_0 = const()[name = string("op_5767_split_sizes_0"), val = tensor([256, 256])]; + int32 var_5767_axis_0 = const()[name = string("op_5767_axis_0"), val = int32(-1)]; + tensor var_5767_0, tensor var_5767_1 = split(axis = var_5767_axis_0, split_sizes = var_5767_split_sizes_0, x = normed_233_cast_fp16)[name = string("op_5767")]; + tensor var_5769 = mul(x = var_5767_0, y = layers_c2_8_self_attn_k_norm_weight)[name = string("op_5769")]; + tensor var_5774 = const()[name = string("op_5774"), val = tensor([1, 2, 1, 256])]; + tensor q_69 = reshape(shape = var_5774, x = var_5769)[name = string("q_69")]; + fp16 var_5776_promoted = const()[name = string("op_5776_promoted"), val = fp16(0x1p+1)]; + tensor var_5745 = transpose(perm = var_5744, x = var_5739)[name = string("transpose_158")]; + tensor var_5777 = pow(x = var_5745, y = var_5776_promoted)[name = string("op_5777")]; + tensor var_5782_axes_0 = const()[name = string("op_5782_axes_0"), val = tensor([-1])]; + bool var_5782_keep_dims_0 = const()[name = string("op_5782_keep_dims_0"), val = bool(true)]; + tensor var_5782 = reduce_mean(axes = var_5782_axes_0, keep_dims = var_5782_keep_dims_0, x = var_5777)[name = string("op_5782")]; + fp16 var_5784_to_fp16 = const()[name = string("op_5784_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_17_cast_fp16 = add(x = var_5782, y = var_5784_to_fp16)[name = string("mean_sq_17_cast_fp16")]; + fp32 var_5786_epsilon_0 = const()[name = string("op_5786_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5786_cast_fp16 = rsqrt(epsilon = var_5786_epsilon_0, x = mean_sq_17_cast_fp16)[name = string("op_5786_cast_fp16")]; + tensor input_247_cast_fp16 = mul(x = var_5745, y = var_5786_cast_fp16)[name = string("input_247_cast_fp16")]; + tensor var_5788_cast_fp16 = mul(x = q_69, y = cos_s)[name = string("op_5788_cast_fp16")]; + tensor var_5789_split_sizes_0 = const()[name = string("op_5789_split_sizes_0"), val = tensor([128, 128])]; + int32 var_5789_axis_0 = const()[name = string("op_5789_axis_0"), val = int32(-1)]; + tensor var_5789_0, tensor var_5789_1 = split(axis = var_5789_axis_0, split_sizes = var_5789_split_sizes_0, x = q_69)[name = string("op_5789")]; + fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; + tensor var_5791 = mul(x = var_5789_1, y = const_98_promoted)[name = string("op_5791")]; + int32 var_5793 = const()[name = string("op_5793"), val = int32(-1)]; + bool var_5794_interleave_0 = const()[name = string("op_5794_interleave_0"), val = bool(false)]; + tensor var_5794 = concat(axis = var_5793, interleave = var_5794_interleave_0, values = (var_5791, var_5789_0))[name = string("op_5794")]; + tensor var_5795_cast_fp16 = mul(x = var_5794, y = sin_s)[name = string("op_5795_cast_fp16")]; + tensor input_245_cast_fp16 = add(x = var_5788_cast_fp16, y = var_5795_cast_fp16)[name = string("input_245_cast_fp16")]; + tensor k_padded_15_pad_0 = const()[name = string("k_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_15_mode_0 = const()[name = string("k_padded_15_mode_0"), val = string("constant")]; + fp16 const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_15_cast_fp16 = pad(constant_val = const_99_to_fp16, mode = k_padded_15_mode_0, pad = k_padded_15_pad_0, x = input_245_cast_fp16)[name = string("k_padded_15_cast_fp16")]; + tensor v_padded_15_pad_0 = const()[name = string("v_padded_15_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_15_mode_0 = const()[name = string("v_padded_15_mode_0"), val = string("constant")]; + fp16 const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_15_cast_fp16 = pad(constant_val = const_100_to_fp16, mode = v_padded_15_mode_0, pad = v_padded_15_pad_0, x = input_247_cast_fp16)[name = string("v_padded_15_cast_fp16")]; + tensor var_5824_begin_0 = const()[name = string("op_5824_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5824_end_0 = const()[name = string("op_5824_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5824_end_mask_0 = const()[name = string("op_5824_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5824_cast_fp16 = slice_by_index(begin = var_5824_begin_0, end = var_5824_end_0, end_mask = var_5824_end_mask_0, x = K_sliding_slot_15_cast_fp16)[name = string("op_5824_cast_fp16")]; + int32 var_5831 = const()[name = string("op_5831"), val = int32(2)]; + bool K_sliding_out_15_interleave_0 = const()[name = string("K_sliding_out_15_interleave_0"), val = bool(false)]; + tensor K_sliding_out_15_cast_fp16 = concat(axis = var_5831, interleave = K_sliding_out_15_interleave_0, values = (var_5824_cast_fp16, k_padded_15_cast_fp16))[name = string("K_sliding_out_15_cast_fp16")]; + tensor var_5847_begin_0 = const()[name = string("op_5847_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_5847_end_0 = const()[name = string("op_5847_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_5847_end_mask_0 = const()[name = string("op_5847_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_5847_cast_fp16 = slice_by_index(begin = var_5847_begin_0, end = var_5847_end_0, end_mask = var_5847_end_mask_0, x = V_sliding_slot_15_cast_fp16)[name = string("op_5847_cast_fp16")]; + int32 var_5854 = const()[name = string("op_5854"), val = int32(2)]; + bool V_sliding_out_15_interleave_0 = const()[name = string("V_sliding_out_15_interleave_0"), val = bool(false)]; + tensor V_sliding_out_15_cast_fp16 = concat(axis = var_5854, interleave = V_sliding_out_15_interleave_0, values = (var_5847_cast_fp16, v_padded_15_cast_fp16))[name = string("V_sliding_out_15_cast_fp16")]; + tensor K_for_attn_17_begin_0 = const()[name = string("K_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_17_end_0 = const()[name = string("K_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_17_end_mask_0 = const()[name = string("K_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_17_cast_fp16 = slice_by_index(begin = K_for_attn_17_begin_0, end = K_for_attn_17_end_0, end_mask = K_for_attn_17_end_mask_0, x = K_sliding_out_15_cast_fp16)[name = string("K_for_attn_17_cast_fp16")]; + tensor V_for_attn_17_begin_0 = const()[name = string("V_for_attn_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_17_end_0 = const()[name = string("V_for_attn_17_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_17_end_mask_0 = const()[name = string("V_for_attn_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_17_cast_fp16 = slice_by_index(begin = V_for_attn_17_begin_0, end = V_for_attn_17_end_0, end_mask = V_for_attn_17_end_mask_0, x = V_sliding_out_15_cast_fp16)[name = string("V_for_attn_17_cast_fp16")]; + tensor transpose_32_perm_0 = const()[name = string("transpose_32_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_16_reps_0 = const()[name = string("tile_16_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_32_cast_fp16 = transpose(perm = transpose_32_perm_0, x = K_for_attn_17_cast_fp16)[name = string("transpose_157")]; + tensor tile_16_cast_fp16 = tile(reps = tile_16_reps_0, x = transpose_32_cast_fp16)[name = string("tile_16_cast_fp16")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_32_cast_fp16 = reshape(shape = concat_32, x = tile_16_cast_fp16)[name = string("reshape_32_cast_fp16")]; + tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_33 = const()[name = string("concat_33"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_33_cast_fp16 = transpose(perm = transpose_33_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_156")]; + tensor reshape_33_cast_fp16 = reshape(shape = concat_33, x = transpose_33_cast_fp16)[name = string("reshape_33_cast_fp16")]; + tensor transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_17_reps_0 = const()[name = string("tile_17_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_34_cast_fp16 = transpose(perm = transpose_34_perm_0, x = V_for_attn_17_cast_fp16)[name = string("transpose_155")]; + tensor tile_17_cast_fp16 = tile(reps = tile_17_reps_0, x = transpose_34_cast_fp16)[name = string("tile_17_cast_fp16")]; + tensor concat_34 = const()[name = string("concat_34"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_34_cast_fp16 = reshape(shape = concat_34, x = tile_17_cast_fp16)[name = string("reshape_34_cast_fp16")]; + tensor transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_35_cast_fp16 = transpose(perm = transpose_35_perm_0, x = reshape_34_cast_fp16)[name = string("transpose_154")]; + tensor reshape_35_cast_fp16 = reshape(shape = concat_35, x = transpose_35_cast_fp16)[name = string("reshape_35_cast_fp16")]; + tensor V_expanded_17_perm_0 = const()[name = string("V_expanded_17_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor transpose_92_cast_fp16 = transpose(perm = transpose_92_perm_0, x = reshape_33_cast_fp16)[name = string("transpose_153")]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_71_cast_fp16, y = transpose_92_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_167_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_167_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_167_cast_fp16)[name = string("reduce_max_8")]; + tensor var_5895 = sub(x = x_167_cast_fp16, y = reduce_max_8)[name = string("op_5895")]; + tensor var_5901 = exp(x = var_5895)[name = string("op_5901")]; + tensor var_5911_axes_0 = const()[name = string("op_5911_axes_0"), val = tensor([-1])]; + bool var_5911_keep_dims_0 = const()[name = string("op_5911_keep_dims_0"), val = bool(true)]; + tensor var_5911 = reduce_sum(axes = var_5911_axes_0, keep_dims = var_5911_keep_dims_0, x = var_5901)[name = string("op_5911")]; + tensor var_5917_cast_fp16 = real_div(x = var_5901, y = var_5911)[name = string("op_5917_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor V_expanded_17_cast_fp16 = transpose(perm = V_expanded_17_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_152")]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_5917_cast_fp16, y = V_expanded_17_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_5928 = const()[name = string("op_5928"), val = tensor([0, 2, 1, 3])]; + tensor var_5935 = const()[name = string("op_5935"), val = tensor([1, 1, -1])]; + tensor var_5929_cast_fp16 = transpose(perm = var_5928, x = attn_output_49_cast_fp16)[name = string("transpose_151")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_5935, x = var_5929_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_5940 = const()[name = string("op_5940"), val = tensor([0, 2, 1])]; + string var_5956_pad_type_0 = const()[name = string("op_5956_pad_type_0"), val = string("valid")]; + int32 var_5956_groups_0 = const()[name = string("op_5956_groups_0"), val = int32(1)]; + tensor var_5956_strides_0 = const()[name = string("op_5956_strides_0"), val = tensor([1])]; + tensor var_5956_pad_0 = const()[name = string("op_5956_pad_0"), val = tensor([0, 0])]; + tensor var_5956_dilations_0 = const()[name = string("op_5956_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(941020288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943641792))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5941_cast_fp16 = transpose(perm = var_5940, x = attn_output_51_cast_fp16)[name = string("transpose_150")]; + tensor var_5956_cast_fp16 = conv(dilations = var_5956_dilations_0, groups = var_5956_groups_0, pad = var_5956_pad_0, pad_type = var_5956_pad_type_0, strides = var_5956_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5941_cast_fp16)[name = string("op_5956_cast_fp16")]; + tensor var_5960 = const()[name = string("op_5960"), val = tensor([0, 2, 1])]; + int32 var_5966 = const()[name = string("op_5966"), val = int32(-1)]; + fp16 const_101_promoted_to_fp16 = const()[name = string("const_101_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_171_cast_fp16 = transpose(perm = var_5960, x = var_5956_cast_fp16)[name = string("transpose_149")]; + tensor var_5968_cast_fp16 = mul(x = x_171_cast_fp16, y = const_101_promoted_to_fp16)[name = string("op_5968_cast_fp16")]; + bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; + tensor input_251_cast_fp16 = concat(axis = var_5966, interleave = input_251_interleave_0, values = (x_171_cast_fp16, var_5968_cast_fp16))[name = string("input_251_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_5963_to_fp16 = const()[name = string("op_5963_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_5963_to_fp16, x = input_251_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor var_5973_split_sizes_0 = const()[name = string("op_5973_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5973_axis_0 = const()[name = string("op_5973_axis_0"), val = int32(-1)]; + tensor var_5973_cast_fp16_0, tensor var_5973_cast_fp16_1 = split(axis = var_5973_axis_0, split_sizes = var_5973_split_sizes_0, x = normed_237_cast_fp16)[name = string("op_5973_cast_fp16")]; + tensor layers_c2_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943644416)))]; + tensor attn_output_53_cast_fp16 = mul(x = var_5973_cast_fp16_0, y = layers_c2_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor x_173_cast_fp16 = add(x = x_159_cast_fp16, y = attn_output_53_cast_fp16)[name = string("x_173_cast_fp16")]; + int32 var_5982 = const()[name = string("op_5982"), val = int32(-1)]; + fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5984_cast_fp16 = mul(x = x_173_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_5984_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_5982, interleave = input_253_interleave_0, values = (x_173_cast_fp16, var_5984_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_5979_to_fp16 = const()[name = string("op_5979_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_5979_to_fp16, x = input_253_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor var_5989_split_sizes_0 = const()[name = string("op_5989_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_5989_axis_0 = const()[name = string("op_5989_axis_0"), val = int32(-1)]; + tensor var_5989_cast_fp16_0, tensor var_5989_cast_fp16_1 = split(axis = var_5989_axis_0, split_sizes = var_5989_split_sizes_0, x = normed_241_cast_fp16)[name = string("op_5989_cast_fp16")]; + tensor layers_c2_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943649600)))]; + tensor h_51_cast_fp16 = mul(x = var_5989_cast_fp16_0, y = layers_c2_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_6000 = const()[name = string("op_6000"), val = tensor([0, 2, 1])]; + tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; + tensor var_6001 = transpose(perm = var_6000, x = h_51_cast_fp16)[name = string("transpose_148")]; + tensor input_255 = expand_dims(axes = input_255_axes_0, x = var_6001)[name = string("input_255")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_c2_8_mlp_gate_proj_weight_palettized, x = input_255)[name = string("gate_33")]; + string up_17_pad_type_0 = const()[name = string("up_17_pad_type_0"), val = string("valid")]; + tensor up_17_strides_0 = const()[name = string("up_17_strides_0"), val = tensor([1, 1])]; + tensor up_17_pad_0 = const()[name = string("up_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_17_dilations_0 = const()[name = string("up_17_dilations_0"), val = tensor([1, 1])]; + int32 up_17_groups_0 = const()[name = string("up_17_groups_0"), val = int32(1)]; + tensor up_17 = conv(dilations = up_17_dilations_0, groups = up_17_groups_0, pad = up_17_pad_0, pad_type = up_17_pad_type_0, strides = up_17_strides_0, weight = layers_c2_8_mlp_up_proj_weight_palettized, x = input_255)[name = string("up_17")]; + string gate_35_mode_0 = const()[name = string("gate_35_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_35 = gelu(mode = gate_35_mode_0, x = gate_33)[name = string("gate_35")]; + tensor input_257 = mul(x = gate_35, y = up_17)[name = string("input_257")]; + string mlp_out_17_pad_type_0 = const()[name = string("mlp_out_17_pad_type_0"), val = string("valid")]; + tensor mlp_out_17_strides_0 = const()[name = string("mlp_out_17_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_17_pad_0 = const()[name = string("mlp_out_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_17_dilations_0 = const()[name = string("mlp_out_17_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_17_groups_0 = const()[name = string("mlp_out_17_groups_0"), val = int32(1)]; + tensor mlp_out_17 = conv(dilations = mlp_out_17_dilations_0, groups = mlp_out_17_groups_0, pad = mlp_out_17_pad_0, pad_type = mlp_out_17_pad_type_0, strides = mlp_out_17_strides_0, weight = layers_c2_8_mlp_down_proj_weight_palettized, x = input_257)[name = string("mlp_out_17")]; + tensor var_6041_axes_0 = const()[name = string("op_6041_axes_0"), val = tensor([2])]; + tensor var_6041 = squeeze(axes = var_6041_axes_0, x = mlp_out_17)[name = string("op_6041")]; + tensor var_6045 = const()[name = string("op_6045"), val = tensor([0, 2, 1])]; + int32 var_6051 = const()[name = string("op_6051"), val = int32(-1)]; + fp16 const_103_promoted = const()[name = string("const_103_promoted"), val = fp16(-0x1p+0)]; + tensor x_175 = transpose(perm = var_6045, x = var_6041)[name = string("transpose_147")]; + tensor var_6053 = mul(x = x_175, y = const_103_promoted)[name = string("op_6053")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259 = concat(axis = var_6051, interleave = input_259_interleave_0, values = (x_175, var_6053))[name = string("input_259")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_6048_to_fp16 = const()[name = string("op_6048_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_6048_to_fp16, x = input_259)[name = string("normed_245_cast_fp16")]; + tensor var_6058_split_sizes_0 = const()[name = string("op_6058_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6058_axis_0 = const()[name = string("op_6058_axis_0"), val = int32(-1)]; + tensor var_6058_0, tensor var_6058_1 = split(axis = var_6058_axis_0, split_sizes = var_6058_split_sizes_0, x = normed_245_cast_fp16)[name = string("op_6058")]; + tensor hidden_states_83 = mul(x = var_6058_0, y = layers_c2_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_173_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_17_begin_0 = const()[name = string("per_layer_slice_17_begin_0"), val = tensor([0, 0, 5120])]; + tensor per_layer_slice_17_end_0 = const()[name = string("per_layer_slice_17_end_0"), val = tensor([1, 1, 5376])]; + tensor per_layer_slice_17_end_mask_0 = const()[name = string("per_layer_slice_17_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_17_cast_fp16 = slice_by_index(begin = per_layer_slice_17_begin_0, end = per_layer_slice_17_end_0, end_mask = per_layer_slice_17_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_17_cast_fp16")]; + tensor var_6086 = const()[name = string("op_6086"), val = tensor([0, 2, 1])]; + tensor input_261_axes_0 = const()[name = string("input_261_axes_0"), val = tensor([2])]; + tensor var_6087 = transpose(perm = var_6086, x = hidden_states_85_cast_fp16)[name = string("transpose_146")]; + tensor input_261 = expand_dims(axes = input_261_axes_0, x = var_6087)[name = string("input_261")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_c2_8_per_layer_input_gate_weight_palettized, x = input_261)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_6106 = const()[name = string("op_6106"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_17_axes_0 = const()[name = string("per_layer_slice_conv_17_axes_0"), val = tensor([2])]; + tensor var_6107_cast_fp16 = transpose(perm = var_6106, x = per_layer_slice_17_cast_fp16)[name = string("transpose_145")]; + tensor per_layer_slice_conv_17_cast_fp16 = expand_dims(axes = per_layer_slice_conv_17_axes_0, x = var_6107_cast_fp16)[name = string("per_layer_slice_conv_17_cast_fp16")]; + tensor input_263_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_17_cast_fp16)[name = string("input_263_cast_fp16")]; + string gated_53_pad_type_0 = const()[name = string("gated_53_pad_type_0"), val = string("valid")]; + tensor gated_53_strides_0 = const()[name = string("gated_53_strides_0"), val = tensor([1, 1])]; + tensor gated_53_pad_0 = const()[name = string("gated_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_53_dilations_0 = const()[name = string("gated_53_dilations_0"), val = tensor([1, 1])]; + int32 gated_53_groups_0 = const()[name = string("gated_53_groups_0"), val = int32(1)]; + tensor layers_c2_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943654784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943982528))))[name = string("layers_c2_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_53_cast_fp16 = conv(dilations = gated_53_dilations_0, groups = gated_53_groups_0, pad = gated_53_pad_0, pad_type = gated_53_pad_type_0, strides = gated_53_strides_0, weight = layers_c2_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_263_cast_fp16)[name = string("gated_53_cast_fp16")]; + tensor var_6123_axes_0 = const()[name = string("op_6123_axes_0"), val = tensor([2])]; + tensor var_6123_cast_fp16 = squeeze(axes = var_6123_axes_0, x = gated_53_cast_fp16)[name = string("op_6123_cast_fp16")]; + tensor var_6127 = const()[name = string("op_6127"), val = tensor([0, 2, 1])]; + int32 var_6133 = const()[name = string("op_6133"), val = int32(-1)]; + fp16 const_104_promoted_to_fp16 = const()[name = string("const_104_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_177_cast_fp16 = transpose(perm = var_6127, x = var_6123_cast_fp16)[name = string("transpose_144")]; + tensor var_6135_cast_fp16 = mul(x = x_177_cast_fp16, y = const_104_promoted_to_fp16)[name = string("op_6135_cast_fp16")]; + bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; + tensor input_265_cast_fp16 = concat(axis = var_6133, interleave = input_265_interleave_0, values = (x_177_cast_fp16, var_6135_cast_fp16))[name = string("input_265_cast_fp16")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_6130_to_fp16 = const()[name = string("op_6130_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_6130_to_fp16, x = input_265_cast_fp16)[name = string("normed_249_cast_fp16")]; + tensor var_6140_split_sizes_0 = const()[name = string("op_6140_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6140_axis_0 = const()[name = string("op_6140_axis_0"), val = int32(-1)]; + tensor var_6140_cast_fp16_0, tensor var_6140_cast_fp16_1 = split(axis = var_6140_axis_0, split_sizes = var_6140_split_sizes_0, x = normed_249_cast_fp16)[name = string("op_6140_cast_fp16")]; + tensor layers_c2_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943985152)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_6140_cast_fp16_0, y = layers_c2_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_91_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; + tensor const_105_promoted_to_fp16 = const()[name = string("const_105_promoted_to_fp16"), val = tensor([0x1.bap-2])]; + tensor x_179_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_105_promoted_to_fp16)[name = string("x_179_cast_fp16")]; + tensor var_6152_axes_0 = const()[name = string("op_6152_axes_0"), val = tensor([0])]; + tensor var_6152_cast_fp16 = squeeze(axes = var_6152_axes_0, x = K_sliding_out_15_cast_fp16)[name = string("op_6152_cast_fp16")]; + tensor var_6154_axes_0 = const()[name = string("op_6154_axes_0"), val = tensor([0])]; + tensor var_6154_cast_fp16 = squeeze(axes = var_6154_axes_0, x = V_sliding_out_15_cast_fp16)[name = string("op_6154_cast_fp16")]; + tensor var_6157_begin_0 = const()[name = string("op_6157_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6157_end_0 = const()[name = string("op_6157_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_6157_end_mask_0 = const()[name = string("op_6157_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6157_squeeze_mask_0 = const()[name = string("op_6157_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6157_cast_fp16 = slice_by_index(begin = var_6157_begin_0, end = var_6157_end_0, end_mask = var_6157_end_mask_0, squeeze_mask = var_6157_squeeze_mask_0, x = K_sliding_in)[name = string("op_6157_cast_fp16")]; + tensor K_sliding_slot_17_axes_0 = const()[name = string("K_sliding_slot_17_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_17_cast_fp16 = expand_dims(axes = K_sliding_slot_17_axes_0, x = var_6157_cast_fp16)[name = string("K_sliding_slot_17_cast_fp16")]; + tensor var_6162_begin_0 = const()[name = string("op_6162_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6162_end_0 = const()[name = string("op_6162_end_0"), val = tensor([9, 2, 512, 512])]; + tensor var_6162_end_mask_0 = const()[name = string("op_6162_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6162_squeeze_mask_0 = const()[name = string("op_6162_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6162_cast_fp16 = slice_by_index(begin = var_6162_begin_0, end = var_6162_end_0, end_mask = var_6162_end_mask_0, squeeze_mask = var_6162_squeeze_mask_0, x = V_sliding_in)[name = string("op_6162_cast_fp16")]; + tensor V_sliding_slot_17_axes_0 = const()[name = string("V_sliding_slot_17_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_17_cast_fp16 = expand_dims(axes = V_sliding_slot_17_axes_0, x = var_6162_cast_fp16)[name = string("V_sliding_slot_17_cast_fp16")]; + int32 var_6169 = const()[name = string("op_6169"), val = int32(-1)]; + fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6171_cast_fp16 = mul(x = x_179_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_6171_cast_fp16")]; + bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; + tensor input_267_cast_fp16 = concat(axis = var_6169, interleave = input_267_interleave_0, values = (x_179_cast_fp16, var_6171_cast_fp16))[name = string("input_267_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_6166_to_fp16 = const()[name = string("op_6166_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_6166_to_fp16, x = input_267_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor var_6176_split_sizes_0 = const()[name = string("op_6176_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6176_axis_0 = const()[name = string("op_6176_axis_0"), val = int32(-1)]; + tensor var_6176_cast_fp16_0, tensor var_6176_cast_fp16_1 = split(axis = var_6176_axis_0, split_sizes = var_6176_split_sizes_0, x = normed_253_cast_fp16)[name = string("op_6176_cast_fp16")]; + tensor layers_c2_9_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_9_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943990336)))]; + tensor h_55_cast_fp16 = mul(x = var_6176_cast_fp16_0, y = layers_c2_9_input_layernorm_weight_promoted_to_fp16)[name = string("h_55_cast_fp16")]; + tensor var_6182 = const()[name = string("op_6182"), val = tensor([0, 2, 1])]; + tensor var_6185_axes_0 = const()[name = string("op_6185_axes_0"), val = tensor([2])]; + tensor var_6183_cast_fp16 = transpose(perm = var_6182, x = h_55_cast_fp16)[name = string("transpose_143")]; + tensor var_6185_cast_fp16 = expand_dims(axes = var_6185_axes_0, x = var_6183_cast_fp16)[name = string("op_6185_cast_fp16")]; + string var_6201_pad_type_0 = const()[name = string("op_6201_pad_type_0"), val = string("valid")]; + tensor var_6201_strides_0 = const()[name = string("op_6201_strides_0"), val = tensor([1, 1])]; + tensor var_6201_pad_0 = const()[name = string("op_6201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6201_dilations_0 = const()[name = string("op_6201_dilations_0"), val = tensor([1, 1])]; + int32 var_6201_groups_0 = const()[name = string("op_6201_groups_0"), val = int32(1)]; + tensor var_6201 = conv(dilations = var_6201_dilations_0, groups = var_6201_groups_0, pad = var_6201_pad_0, pad_type = var_6201_pad_type_0, strides = var_6201_strides_0, weight = layers_c2_9_self_attn_q_proj_weight_palettized, x = var_6185_cast_fp16)[name = string("op_6201")]; + tensor var_6206 = const()[name = string("op_6206"), val = tensor([1, 8, 256, 1])]; + tensor var_6207 = reshape(shape = var_6206, x = var_6201)[name = string("op_6207")]; + tensor var_6212 = const()[name = string("op_6212"), val = tensor([0, 1, 3, 2])]; + tensor var_6222 = const()[name = string("op_6222"), val = tensor([1, 8, 256])]; + tensor var_6213 = transpose(perm = var_6212, x = var_6207)[name = string("transpose_142")]; + tensor x_181 = reshape(shape = var_6222, x = var_6213)[name = string("x_181")]; + int32 var_6228 = const()[name = string("op_6228"), val = int32(-1)]; + fp16 const_107_promoted = const()[name = string("const_107_promoted"), val = fp16(-0x1p+0)]; + tensor var_6230 = mul(x = x_181, y = const_107_promoted)[name = string("op_6230")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271 = concat(axis = var_6228, interleave = input_271_interleave_0, values = (x_181, var_6230))[name = string("input_271")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_6225_to_fp16 = const()[name = string("op_6225_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_6225_to_fp16, x = input_271)[name = string("normed_257_cast_fp16")]; + tensor var_6235_split_sizes_0 = const()[name = string("op_6235_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6235_axis_0 = const()[name = string("op_6235_axis_0"), val = int32(-1)]; + tensor var_6235_0, tensor var_6235_1 = split(axis = var_6235_axis_0, split_sizes = var_6235_split_sizes_0, x = normed_257_cast_fp16)[name = string("op_6235")]; + tensor var_6237 = mul(x = var_6235_0, y = layers_c2_9_self_attn_q_norm_weight)[name = string("op_6237")]; + tensor var_6242 = const()[name = string("op_6242"), val = tensor([1, 8, 1, 256])]; + tensor q_75 = reshape(shape = var_6242, x = var_6237)[name = string("q_75")]; + tensor var_6244_cast_fp16 = mul(x = q_75, y = cos_s)[name = string("op_6244_cast_fp16")]; + tensor var_6245_split_sizes_0 = const()[name = string("op_6245_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6245_axis_0 = const()[name = string("op_6245_axis_0"), val = int32(-1)]; + tensor var_6245_0, tensor var_6245_1 = split(axis = var_6245_axis_0, split_sizes = var_6245_split_sizes_0, x = q_75)[name = string("op_6245")]; + fp16 const_108_promoted = const()[name = string("const_108_promoted"), val = fp16(-0x1p+0)]; + tensor var_6247 = mul(x = var_6245_1, y = const_108_promoted)[name = string("op_6247")]; + int32 var_6249 = const()[name = string("op_6249"), val = int32(-1)]; + bool var_6250_interleave_0 = const()[name = string("op_6250_interleave_0"), val = bool(false)]; + tensor var_6250 = concat(axis = var_6249, interleave = var_6250_interleave_0, values = (var_6247, var_6245_0))[name = string("op_6250")]; + tensor var_6251_cast_fp16 = mul(x = var_6250, y = sin_s)[name = string("op_6251_cast_fp16")]; + tensor q_79_cast_fp16 = add(x = var_6244_cast_fp16, y = var_6251_cast_fp16)[name = string("q_79_cast_fp16")]; + string var_6264_pad_type_0 = const()[name = string("op_6264_pad_type_0"), val = string("valid")]; + tensor var_6264_strides_0 = const()[name = string("op_6264_strides_0"), val = tensor([1, 1])]; + tensor var_6264_pad_0 = const()[name = string("op_6264_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6264_dilations_0 = const()[name = string("op_6264_dilations_0"), val = tensor([1, 1])]; + int32 var_6264_groups_0 = const()[name = string("op_6264_groups_0"), val = int32(1)]; + tensor var_6264 = conv(dilations = var_6264_dilations_0, groups = var_6264_groups_0, pad = var_6264_pad_0, pad_type = var_6264_pad_type_0, strides = var_6264_strides_0, weight = layers_c2_9_self_attn_k_proj_weight_palettized, x = var_6185_cast_fp16)[name = string("op_6264")]; + tensor var_6269 = const()[name = string("op_6269"), val = tensor([1, 2, 256, 1])]; + tensor var_6270 = reshape(shape = var_6269, x = var_6264)[name = string("op_6270")]; + tensor var_6275 = const()[name = string("op_6275"), val = tensor([0, 1, 3, 2])]; + string var_6292_pad_type_0 = const()[name = string("op_6292_pad_type_0"), val = string("valid")]; + tensor var_6292_strides_0 = const()[name = string("op_6292_strides_0"), val = tensor([1, 1])]; + tensor var_6292_pad_0 = const()[name = string("op_6292_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6292_dilations_0 = const()[name = string("op_6292_dilations_0"), val = tensor([1, 1])]; + int32 var_6292_groups_0 = const()[name = string("op_6292_groups_0"), val = int32(1)]; + tensor var_6292 = conv(dilations = var_6292_dilations_0, groups = var_6292_groups_0, pad = var_6292_pad_0, pad_type = var_6292_pad_type_0, strides = var_6292_strides_0, weight = layers_c2_9_self_attn_v_proj_weight_palettized, x = var_6185_cast_fp16)[name = string("op_6292")]; + tensor var_6297 = const()[name = string("op_6297"), val = tensor([1, 2, 256, 1])]; + tensor var_6298 = reshape(shape = var_6297, x = var_6292)[name = string("op_6298")]; + tensor var_6303 = const()[name = string("op_6303"), val = tensor([0, 1, 3, 2])]; + tensor var_6313 = const()[name = string("op_6313"), val = tensor([1, 2, 256])]; + tensor var_6276 = transpose(perm = var_6275, x = var_6270)[name = string("transpose_141")]; + tensor x_183 = reshape(shape = var_6313, x = var_6276)[name = string("x_183")]; + int32 var_6319 = const()[name = string("op_6319"), val = int32(-1)]; + fp16 const_109_promoted = const()[name = string("const_109_promoted"), val = fp16(-0x1p+0)]; + tensor var_6321 = mul(x = x_183, y = const_109_promoted)[name = string("op_6321")]; + bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; + tensor input_273 = concat(axis = var_6319, interleave = input_273_interleave_0, values = (x_183, var_6321))[name = string("input_273")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_6316_to_fp16 = const()[name = string("op_6316_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_6316_to_fp16, x = input_273)[name = string("normed_261_cast_fp16")]; + tensor var_6326_split_sizes_0 = const()[name = string("op_6326_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6326_axis_0 = const()[name = string("op_6326_axis_0"), val = int32(-1)]; + tensor var_6326_0, tensor var_6326_1 = split(axis = var_6326_axis_0, split_sizes = var_6326_split_sizes_0, x = normed_261_cast_fp16)[name = string("op_6326")]; + tensor var_6328 = mul(x = var_6326_0, y = layers_c2_9_self_attn_k_norm_weight)[name = string("op_6328")]; + tensor var_6333 = const()[name = string("op_6333"), val = tensor([1, 2, 1, 256])]; + tensor q_77 = reshape(shape = var_6333, x = var_6328)[name = string("q_77")]; + fp16 var_6335_promoted = const()[name = string("op_6335_promoted"), val = fp16(0x1p+1)]; + tensor var_6304 = transpose(perm = var_6303, x = var_6298)[name = string("transpose_140")]; + tensor var_6336 = pow(x = var_6304, y = var_6335_promoted)[name = string("op_6336")]; + tensor var_6341_axes_0 = const()[name = string("op_6341_axes_0"), val = tensor([-1])]; + bool var_6341_keep_dims_0 = const()[name = string("op_6341_keep_dims_0"), val = bool(true)]; + tensor var_6341 = reduce_mean(axes = var_6341_axes_0, keep_dims = var_6341_keep_dims_0, x = var_6336)[name = string("op_6341")]; + fp16 var_6343_to_fp16 = const()[name = string("op_6343_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_19_cast_fp16 = add(x = var_6341, y = var_6343_to_fp16)[name = string("mean_sq_19_cast_fp16")]; + fp32 var_6345_epsilon_0 = const()[name = string("op_6345_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6345_cast_fp16 = rsqrt(epsilon = var_6345_epsilon_0, x = mean_sq_19_cast_fp16)[name = string("op_6345_cast_fp16")]; + tensor input_277_cast_fp16 = mul(x = var_6304, y = var_6345_cast_fp16)[name = string("input_277_cast_fp16")]; + tensor var_6347_cast_fp16 = mul(x = q_77, y = cos_s)[name = string("op_6347_cast_fp16")]; + tensor var_6348_split_sizes_0 = const()[name = string("op_6348_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6348_axis_0 = const()[name = string("op_6348_axis_0"), val = int32(-1)]; + tensor var_6348_0, tensor var_6348_1 = split(axis = var_6348_axis_0, split_sizes = var_6348_split_sizes_0, x = q_77)[name = string("op_6348")]; + fp16 const_110_promoted = const()[name = string("const_110_promoted"), val = fp16(-0x1p+0)]; + tensor var_6350 = mul(x = var_6348_1, y = const_110_promoted)[name = string("op_6350")]; + int32 var_6352 = const()[name = string("op_6352"), val = int32(-1)]; + bool var_6353_interleave_0 = const()[name = string("op_6353_interleave_0"), val = bool(false)]; + tensor var_6353 = concat(axis = var_6352, interleave = var_6353_interleave_0, values = (var_6350, var_6348_0))[name = string("op_6353")]; + tensor var_6354_cast_fp16 = mul(x = var_6353, y = sin_s)[name = string("op_6354_cast_fp16")]; + tensor input_275_cast_fp16 = add(x = var_6347_cast_fp16, y = var_6354_cast_fp16)[name = string("input_275_cast_fp16")]; + tensor k_padded_17_pad_0 = const()[name = string("k_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_17_mode_0 = const()[name = string("k_padded_17_mode_0"), val = string("constant")]; + fp16 const_111_to_fp16 = const()[name = string("const_111_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_17_cast_fp16 = pad(constant_val = const_111_to_fp16, mode = k_padded_17_mode_0, pad = k_padded_17_pad_0, x = input_275_cast_fp16)[name = string("k_padded_17_cast_fp16")]; + tensor v_padded_17_pad_0 = const()[name = string("v_padded_17_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_17_mode_0 = const()[name = string("v_padded_17_mode_0"), val = string("constant")]; + fp16 const_112_to_fp16 = const()[name = string("const_112_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_17_cast_fp16 = pad(constant_val = const_112_to_fp16, mode = v_padded_17_mode_0, pad = v_padded_17_pad_0, x = input_277_cast_fp16)[name = string("v_padded_17_cast_fp16")]; + tensor var_6383_begin_0 = const()[name = string("op_6383_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6383_end_0 = const()[name = string("op_6383_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6383_end_mask_0 = const()[name = string("op_6383_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6383_cast_fp16 = slice_by_index(begin = var_6383_begin_0, end = var_6383_end_0, end_mask = var_6383_end_mask_0, x = K_sliding_slot_17_cast_fp16)[name = string("op_6383_cast_fp16")]; + int32 var_6390 = const()[name = string("op_6390"), val = int32(2)]; + bool K_sliding_out_17_interleave_0 = const()[name = string("K_sliding_out_17_interleave_0"), val = bool(false)]; + tensor K_sliding_out_17_cast_fp16 = concat(axis = var_6390, interleave = K_sliding_out_17_interleave_0, values = (var_6383_cast_fp16, k_padded_17_cast_fp16))[name = string("K_sliding_out_17_cast_fp16")]; + tensor var_6406_begin_0 = const()[name = string("op_6406_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6406_end_0 = const()[name = string("op_6406_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6406_end_mask_0 = const()[name = string("op_6406_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6406_cast_fp16 = slice_by_index(begin = var_6406_begin_0, end = var_6406_end_0, end_mask = var_6406_end_mask_0, x = V_sliding_slot_17_cast_fp16)[name = string("op_6406_cast_fp16")]; + int32 var_6413 = const()[name = string("op_6413"), val = int32(2)]; + bool V_sliding_out_17_interleave_0 = const()[name = string("V_sliding_out_17_interleave_0"), val = bool(false)]; + tensor V_sliding_out_17_cast_fp16 = concat(axis = var_6413, interleave = V_sliding_out_17_interleave_0, values = (var_6406_cast_fp16, v_padded_17_cast_fp16))[name = string("V_sliding_out_17_cast_fp16")]; + tensor K_for_attn_19_begin_0 = const()[name = string("K_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_19_end_0 = const()[name = string("K_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_19_end_mask_0 = const()[name = string("K_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor K_for_attn_19_cast_fp16 = slice_by_index(begin = K_for_attn_19_begin_0, end = K_for_attn_19_end_0, end_mask = K_for_attn_19_end_mask_0, x = K_sliding_out_17_cast_fp16)[name = string("K_for_attn_19_cast_fp16")]; + tensor V_for_attn_19_begin_0 = const()[name = string("V_for_attn_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_19_end_0 = const()[name = string("V_for_attn_19_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_19_end_mask_0 = const()[name = string("V_for_attn_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor V_for_attn_19_cast_fp16 = slice_by_index(begin = V_for_attn_19_begin_0, end = V_for_attn_19_end_0, end_mask = V_for_attn_19_end_mask_0, x = V_sliding_out_17_cast_fp16)[name = string("V_for_attn_19_cast_fp16")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_18_reps_0 = const()[name = string("tile_18_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = K_for_attn_19_cast_fp16)[name = string("transpose_139")]; + tensor tile_18_cast_fp16 = tile(reps = tile_18_reps_0, x = transpose_36_cast_fp16)[name = string("tile_18_cast_fp16")]; + tensor concat_36 = const()[name = string("concat_36"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_36_cast_fp16 = reshape(shape = concat_36, x = tile_18_cast_fp16)[name = string("reshape_36_cast_fp16")]; + tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_37 = const()[name = string("concat_37"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = reshape_36_cast_fp16)[name = string("transpose_138")]; + tensor reshape_37_cast_fp16 = reshape(shape = concat_37, x = transpose_37_cast_fp16)[name = string("reshape_37_cast_fp16")]; + tensor transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_19_reps_0 = const()[name = string("tile_19_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_38_cast_fp16 = transpose(perm = transpose_38_perm_0, x = V_for_attn_19_cast_fp16)[name = string("transpose_137")]; + tensor tile_19_cast_fp16 = tile(reps = tile_19_reps_0, x = transpose_38_cast_fp16)[name = string("tile_19_cast_fp16")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_38_cast_fp16 = reshape(shape = concat_38, x = tile_19_cast_fp16)[name = string("reshape_38_cast_fp16")]; + tensor transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_39 = const()[name = string("concat_39"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_39_cast_fp16 = transpose(perm = transpose_39_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_136")]; + tensor reshape_39_cast_fp16 = reshape(shape = concat_39, x = transpose_39_cast_fp16)[name = string("reshape_39_cast_fp16")]; + tensor V_expanded_19_perm_0 = const()[name = string("V_expanded_19_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_37_transpose_x_0 = const()[name = string("attn_weights_37_transpose_x_0"), val = bool(false)]; + bool attn_weights_37_transpose_y_0 = const()[name = string("attn_weights_37_transpose_y_0"), val = bool(false)]; + tensor transpose_93_cast_fp16 = transpose(perm = transpose_93_perm_0, x = reshape_37_cast_fp16)[name = string("transpose_135")]; + tensor attn_weights_37_cast_fp16 = matmul(transpose_x = attn_weights_37_transpose_x_0, transpose_y = attn_weights_37_transpose_y_0, x = q_79_cast_fp16, y = transpose_93_cast_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor x_187_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask_sliding)[name = string("x_187_cast_fp16")]; + tensor reduce_max_9_axes_0 = const()[name = string("reduce_max_9_axes_0"), val = tensor([-1])]; + bool reduce_max_9_keep_dims_0 = const()[name = string("reduce_max_9_keep_dims_0"), val = bool(true)]; + tensor reduce_max_9 = reduce_max(axes = reduce_max_9_axes_0, keep_dims = reduce_max_9_keep_dims_0, x = x_187_cast_fp16)[name = string("reduce_max_9")]; + tensor var_6454 = sub(x = x_187_cast_fp16, y = reduce_max_9)[name = string("op_6454")]; + tensor var_6460 = exp(x = var_6454)[name = string("op_6460")]; + tensor var_6470_axes_0 = const()[name = string("op_6470_axes_0"), val = tensor([-1])]; + bool var_6470_keep_dims_0 = const()[name = string("op_6470_keep_dims_0"), val = bool(true)]; + tensor var_6470 = reduce_sum(axes = var_6470_axes_0, keep_dims = var_6470_keep_dims_0, x = var_6460)[name = string("op_6470")]; + tensor var_6476_cast_fp16 = real_div(x = var_6460, y = var_6470)[name = string("op_6476_cast_fp16")]; + bool attn_output_55_transpose_x_0 = const()[name = string("attn_output_55_transpose_x_0"), val = bool(false)]; + bool attn_output_55_transpose_y_0 = const()[name = string("attn_output_55_transpose_y_0"), val = bool(false)]; + tensor V_expanded_19_cast_fp16 = transpose(perm = V_expanded_19_perm_0, x = reshape_39_cast_fp16)[name = string("transpose_134")]; + tensor attn_output_55_cast_fp16 = matmul(transpose_x = attn_output_55_transpose_x_0, transpose_y = attn_output_55_transpose_y_0, x = var_6476_cast_fp16, y = V_expanded_19_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_6487 = const()[name = string("op_6487"), val = tensor([0, 2, 1, 3])]; + tensor var_6494 = const()[name = string("op_6494"), val = tensor([1, 1, -1])]; + tensor var_6488_cast_fp16 = transpose(perm = var_6487, x = attn_output_55_cast_fp16)[name = string("transpose_133")]; + tensor attn_output_57_cast_fp16 = reshape(shape = var_6494, x = var_6488_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_6499 = const()[name = string("op_6499"), val = tensor([0, 2, 1])]; + string var_6515_pad_type_0 = const()[name = string("op_6515_pad_type_0"), val = string("valid")]; + int32 var_6515_groups_0 = const()[name = string("op_6515_groups_0"), val = int32(1)]; + tensor var_6515_strides_0 = const()[name = string("op_6515_strides_0"), val = tensor([1])]; + tensor var_6515_pad_0 = const()[name = string("op_6515_pad_0"), val = tensor([0, 0])]; + tensor var_6515_dilations_0 = const()[name = string("op_6515_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(943995520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946617024))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6500_cast_fp16 = transpose(perm = var_6499, x = attn_output_57_cast_fp16)[name = string("transpose_132")]; + tensor var_6515_cast_fp16 = conv(dilations = var_6515_dilations_0, groups = var_6515_groups_0, pad = var_6515_pad_0, pad_type = var_6515_pad_type_0, strides = var_6515_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6500_cast_fp16)[name = string("op_6515_cast_fp16")]; + tensor var_6519 = const()[name = string("op_6519"), val = tensor([0, 2, 1])]; + int32 var_6525 = const()[name = string("op_6525"), val = int32(-1)]; + fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_191_cast_fp16 = transpose(perm = var_6519, x = var_6515_cast_fp16)[name = string("transpose_131")]; + tensor var_6527_cast_fp16 = mul(x = x_191_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_6527_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_6525, interleave = input_281_interleave_0, values = (x_191_cast_fp16, var_6527_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_6522_to_fp16 = const()[name = string("op_6522_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_6522_to_fp16, x = input_281_cast_fp16)[name = string("normed_265_cast_fp16")]; + tensor var_6532_split_sizes_0 = const()[name = string("op_6532_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6532_axis_0 = const()[name = string("op_6532_axis_0"), val = int32(-1)]; + tensor var_6532_cast_fp16_0, tensor var_6532_cast_fp16_1 = split(axis = var_6532_axis_0, split_sizes = var_6532_split_sizes_0, x = normed_265_cast_fp16)[name = string("op_6532_cast_fp16")]; + tensor layers_c2_9_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_9_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946619648)))]; + tensor attn_output_59_cast_fp16 = mul(x = var_6532_cast_fp16_0, y = layers_c2_9_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_59_cast_fp16")]; + tensor x_193_cast_fp16 = add(x = x_179_cast_fp16, y = attn_output_59_cast_fp16)[name = string("x_193_cast_fp16")]; + int32 var_6541 = const()[name = string("op_6541"), val = int32(-1)]; + fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6543_cast_fp16 = mul(x = x_193_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_6543_cast_fp16")]; + bool input_283_interleave_0 = const()[name = string("input_283_interleave_0"), val = bool(false)]; + tensor input_283_cast_fp16 = concat(axis = var_6541, interleave = input_283_interleave_0, values = (x_193_cast_fp16, var_6543_cast_fp16))[name = string("input_283_cast_fp16")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_6538_to_fp16 = const()[name = string("op_6538_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_6538_to_fp16, x = input_283_cast_fp16)[name = string("normed_269_cast_fp16")]; + tensor var_6548_split_sizes_0 = const()[name = string("op_6548_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6548_axis_0 = const()[name = string("op_6548_axis_0"), val = int32(-1)]; + tensor var_6548_cast_fp16_0, tensor var_6548_cast_fp16_1 = split(axis = var_6548_axis_0, split_sizes = var_6548_split_sizes_0, x = normed_269_cast_fp16)[name = string("op_6548_cast_fp16")]; + tensor layers_c2_9_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_9_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946624832)))]; + tensor h_57_cast_fp16 = mul(x = var_6548_cast_fp16_0, y = layers_c2_9_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_57_cast_fp16")]; + tensor var_6559 = const()[name = string("op_6559"), val = tensor([0, 2, 1])]; + tensor input_285_axes_0 = const()[name = string("input_285_axes_0"), val = tensor([2])]; + tensor var_6560 = transpose(perm = var_6559, x = h_57_cast_fp16)[name = string("transpose_130")]; + tensor input_285 = expand_dims(axes = input_285_axes_0, x = var_6560)[name = string("input_285")]; + string gate_37_pad_type_0 = const()[name = string("gate_37_pad_type_0"), val = string("valid")]; + tensor gate_37_strides_0 = const()[name = string("gate_37_strides_0"), val = tensor([1, 1])]; + tensor gate_37_pad_0 = const()[name = string("gate_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_37_dilations_0 = const()[name = string("gate_37_dilations_0"), val = tensor([1, 1])]; + int32 gate_37_groups_0 = const()[name = string("gate_37_groups_0"), val = int32(1)]; + tensor gate_37 = conv(dilations = gate_37_dilations_0, groups = gate_37_groups_0, pad = gate_37_pad_0, pad_type = gate_37_pad_type_0, strides = gate_37_strides_0, weight = layers_c2_9_mlp_gate_proj_weight_palettized, x = input_285)[name = string("gate_37")]; + string up_19_pad_type_0 = const()[name = string("up_19_pad_type_0"), val = string("valid")]; + tensor up_19_strides_0 = const()[name = string("up_19_strides_0"), val = tensor([1, 1])]; + tensor up_19_pad_0 = const()[name = string("up_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_19_dilations_0 = const()[name = string("up_19_dilations_0"), val = tensor([1, 1])]; + int32 up_19_groups_0 = const()[name = string("up_19_groups_0"), val = int32(1)]; + tensor up_19 = conv(dilations = up_19_dilations_0, groups = up_19_groups_0, pad = up_19_pad_0, pad_type = up_19_pad_type_0, strides = up_19_strides_0, weight = layers_c2_9_mlp_up_proj_weight_palettized, x = input_285)[name = string("up_19")]; + string gate_39_mode_0 = const()[name = string("gate_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_39 = gelu(mode = gate_39_mode_0, x = gate_37)[name = string("gate_39")]; + tensor input_287 = mul(x = gate_39, y = up_19)[name = string("input_287")]; + string mlp_out_19_pad_type_0 = const()[name = string("mlp_out_19_pad_type_0"), val = string("valid")]; + tensor mlp_out_19_strides_0 = const()[name = string("mlp_out_19_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_19_pad_0 = const()[name = string("mlp_out_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_19_dilations_0 = const()[name = string("mlp_out_19_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_19_groups_0 = const()[name = string("mlp_out_19_groups_0"), val = int32(1)]; + tensor mlp_out_19 = conv(dilations = mlp_out_19_dilations_0, groups = mlp_out_19_groups_0, pad = mlp_out_19_pad_0, pad_type = mlp_out_19_pad_type_0, strides = mlp_out_19_strides_0, weight = layers_c2_9_mlp_down_proj_weight_palettized, x = input_287)[name = string("mlp_out_19")]; + tensor var_6600_axes_0 = const()[name = string("op_6600_axes_0"), val = tensor([2])]; + tensor var_6600 = squeeze(axes = var_6600_axes_0, x = mlp_out_19)[name = string("op_6600")]; + tensor var_6604 = const()[name = string("op_6604"), val = tensor([0, 2, 1])]; + int32 var_6610 = const()[name = string("op_6610"), val = int32(-1)]; + fp16 const_115_promoted = const()[name = string("const_115_promoted"), val = fp16(-0x1p+0)]; + tensor x_195 = transpose(perm = var_6604, x = var_6600)[name = string("transpose_129")]; + tensor var_6612 = mul(x = x_195, y = const_115_promoted)[name = string("op_6612")]; + bool input_289_interleave_0 = const()[name = string("input_289_interleave_0"), val = bool(false)]; + tensor input_289 = concat(axis = var_6610, interleave = input_289_interleave_0, values = (x_195, var_6612))[name = string("input_289")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_6607_to_fp16 = const()[name = string("op_6607_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_6607_to_fp16, x = input_289)[name = string("normed_273_cast_fp16")]; + tensor var_6617_split_sizes_0 = const()[name = string("op_6617_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6617_axis_0 = const()[name = string("op_6617_axis_0"), val = int32(-1)]; + tensor var_6617_0, tensor var_6617_1 = split(axis = var_6617_axis_0, split_sizes = var_6617_split_sizes_0, x = normed_273_cast_fp16)[name = string("op_6617")]; + tensor hidden_states_93 = mul(x = var_6617_0, y = layers_c2_9_post_feedforward_layernorm_weight)[name = string("hidden_states_93")]; + tensor hidden_states_95_cast_fp16 = add(x = x_193_cast_fp16, y = hidden_states_93)[name = string("hidden_states_95_cast_fp16")]; + tensor per_layer_slice_19_begin_0 = const()[name = string("per_layer_slice_19_begin_0"), val = tensor([0, 0, 5376])]; + tensor per_layer_slice_19_end_0 = const()[name = string("per_layer_slice_19_end_0"), val = tensor([1, 1, 5632])]; + tensor per_layer_slice_19_end_mask_0 = const()[name = string("per_layer_slice_19_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_19_cast_fp16 = slice_by_index(begin = per_layer_slice_19_begin_0, end = per_layer_slice_19_end_0, end_mask = per_layer_slice_19_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_19_cast_fp16")]; + tensor var_6645 = const()[name = string("op_6645"), val = tensor([0, 2, 1])]; + tensor input_291_axes_0 = const()[name = string("input_291_axes_0"), val = tensor([2])]; + tensor var_6646 = transpose(perm = var_6645, x = hidden_states_95_cast_fp16)[name = string("transpose_128")]; + tensor input_291 = expand_dims(axes = input_291_axes_0, x = var_6646)[name = string("input_291")]; + string gated_55_pad_type_0 = const()[name = string("gated_55_pad_type_0"), val = string("valid")]; + tensor gated_55_strides_0 = const()[name = string("gated_55_strides_0"), val = tensor([1, 1])]; + tensor gated_55_pad_0 = const()[name = string("gated_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_55_dilations_0 = const()[name = string("gated_55_dilations_0"), val = tensor([1, 1])]; + int32 gated_55_groups_0 = const()[name = string("gated_55_groups_0"), val = int32(1)]; + tensor gated_55 = conv(dilations = gated_55_dilations_0, groups = gated_55_groups_0, pad = gated_55_pad_0, pad_type = gated_55_pad_type_0, strides = gated_55_strides_0, weight = layers_c2_9_per_layer_input_gate_weight_palettized, x = input_291)[name = string("gated_55")]; + string gated_57_mode_0 = const()[name = string("gated_57_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_57 = gelu(mode = gated_57_mode_0, x = gated_55)[name = string("gated_57")]; + tensor var_6665 = const()[name = string("op_6665"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_19_axes_0 = const()[name = string("per_layer_slice_conv_19_axes_0"), val = tensor([2])]; + tensor var_6666_cast_fp16 = transpose(perm = var_6665, x = per_layer_slice_19_cast_fp16)[name = string("transpose_127")]; + tensor per_layer_slice_conv_19_cast_fp16 = expand_dims(axes = per_layer_slice_conv_19_axes_0, x = var_6666_cast_fp16)[name = string("per_layer_slice_conv_19_cast_fp16")]; + tensor input_293_cast_fp16 = mul(x = gated_57, y = per_layer_slice_conv_19_cast_fp16)[name = string("input_293_cast_fp16")]; + string gated_59_pad_type_0 = const()[name = string("gated_59_pad_type_0"), val = string("valid")]; + tensor gated_59_strides_0 = const()[name = string("gated_59_strides_0"), val = tensor([1, 1])]; + tensor gated_59_pad_0 = const()[name = string("gated_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_59_dilations_0 = const()[name = string("gated_59_dilations_0"), val = tensor([1, 1])]; + int32 gated_59_groups_0 = const()[name = string("gated_59_groups_0"), val = int32(1)]; + tensor layers_c2_9_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946630016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946957760))))[name = string("layers_c2_9_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_59_cast_fp16 = conv(dilations = gated_59_dilations_0, groups = gated_59_groups_0, pad = gated_59_pad_0, pad_type = gated_59_pad_type_0, strides = gated_59_strides_0, weight = layers_c2_9_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_293_cast_fp16)[name = string("gated_59_cast_fp16")]; + tensor var_6682_axes_0 = const()[name = string("op_6682_axes_0"), val = tensor([2])]; + tensor var_6682_cast_fp16 = squeeze(axes = var_6682_axes_0, x = gated_59_cast_fp16)[name = string("op_6682_cast_fp16")]; + tensor var_6686 = const()[name = string("op_6686"), val = tensor([0, 2, 1])]; + int32 var_6692 = const()[name = string("op_6692"), val = int32(-1)]; + fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_197_cast_fp16 = transpose(perm = var_6686, x = var_6682_cast_fp16)[name = string("transpose_126")]; + tensor var_6694_cast_fp16 = mul(x = x_197_cast_fp16, y = const_116_promoted_to_fp16)[name = string("op_6694_cast_fp16")]; + bool input_295_interleave_0 = const()[name = string("input_295_interleave_0"), val = bool(false)]; + tensor input_295_cast_fp16 = concat(axis = var_6692, interleave = input_295_interleave_0, values = (x_197_cast_fp16, var_6694_cast_fp16))[name = string("input_295_cast_fp16")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_6689_to_fp16 = const()[name = string("op_6689_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_6689_to_fp16, x = input_295_cast_fp16)[name = string("normed_277_cast_fp16")]; + tensor var_6699_split_sizes_0 = const()[name = string("op_6699_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6699_axis_0 = const()[name = string("op_6699_axis_0"), val = int32(-1)]; + tensor var_6699_cast_fp16_0, tensor var_6699_cast_fp16_1 = split(axis = var_6699_axis_0, split_sizes = var_6699_split_sizes_0, x = normed_277_cast_fp16)[name = string("op_6699_cast_fp16")]; + tensor layers_c2_9_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_9_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946960384)))]; + tensor hidden_states_99_cast_fp16 = mul(x = var_6699_cast_fp16_0, y = layers_c2_9_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_99_cast_fp16")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_95_cast_fp16, y = hidden_states_99_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + tensor const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = tensor([0x1.d8p-2])]; + tensor x_199_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_117_promoted_to_fp16)[name = string("x_199_cast_fp16")]; + tensor var_6711_axes_0 = const()[name = string("op_6711_axes_0"), val = tensor([0])]; + tensor var_6711_cast_fp16 = squeeze(axes = var_6711_axes_0, x = K_sliding_out_17_cast_fp16)[name = string("op_6711_cast_fp16")]; + tensor var_6713_axes_0 = const()[name = string("op_6713_axes_0"), val = tensor([0])]; + tensor var_6713_cast_fp16 = squeeze(axes = var_6713_axes_0, x = V_sliding_out_17_cast_fp16)[name = string("op_6713_cast_fp16")]; + tensor var_6716_begin_0 = const()[name = string("op_6716_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6716_end_0 = const()[name = string("op_6716_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6716_end_mask_0 = const()[name = string("op_6716_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6716_squeeze_mask_0 = const()[name = string("op_6716_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6716_cast_fp16 = slice_by_index(begin = var_6716_begin_0, end = var_6716_end_0, end_mask = var_6716_end_mask_0, squeeze_mask = var_6716_squeeze_mask_0, x = K_sliding_in)[name = string("op_6716_cast_fp16")]; + tensor K_sliding_slot_axes_0 = const()[name = string("K_sliding_slot_axes_0"), val = tensor([0])]; + tensor K_sliding_slot_cast_fp16 = expand_dims(axes = K_sliding_slot_axes_0, x = var_6716_cast_fp16)[name = string("K_sliding_slot_cast_fp16")]; + tensor var_6721_begin_0 = const()[name = string("op_6721_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6721_end_0 = const()[name = string("op_6721_end_0"), val = tensor([10, 2, 512, 512])]; + tensor var_6721_end_mask_0 = const()[name = string("op_6721_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6721_squeeze_mask_0 = const()[name = string("op_6721_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_6721_cast_fp16 = slice_by_index(begin = var_6721_begin_0, end = var_6721_end_0, end_mask = var_6721_end_mask_0, squeeze_mask = var_6721_squeeze_mask_0, x = V_sliding_in)[name = string("op_6721_cast_fp16")]; + tensor V_sliding_slot_axes_0 = const()[name = string("V_sliding_slot_axes_0"), val = tensor([0])]; + tensor V_sliding_slot_cast_fp16 = expand_dims(axes = V_sliding_slot_axes_0, x = var_6721_cast_fp16)[name = string("V_sliding_slot_cast_fp16")]; + int32 var_6728 = const()[name = string("op_6728"), val = int32(-1)]; + fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6730_cast_fp16 = mul(x = x_199_cast_fp16, y = const_118_promoted_to_fp16)[name = string("op_6730_cast_fp16")]; + bool input_297_interleave_0 = const()[name = string("input_297_interleave_0"), val = bool(false)]; + tensor input_297_cast_fp16 = concat(axis = var_6728, interleave = input_297_interleave_0, values = (x_199_cast_fp16, var_6730_cast_fp16))[name = string("input_297_cast_fp16")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_6725_to_fp16 = const()[name = string("op_6725_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_6725_to_fp16, x = input_297_cast_fp16)[name = string("normed_281_cast_fp16")]; + tensor var_6735_split_sizes_0 = const()[name = string("op_6735_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_6735_axis_0 = const()[name = string("op_6735_axis_0"), val = int32(-1)]; + tensor var_6735_cast_fp16_0, tensor var_6735_cast_fp16_1 = split(axis = var_6735_axis_0, split_sizes = var_6735_split_sizes_0, x = normed_281_cast_fp16)[name = string("op_6735_cast_fp16")]; + tensor layers_c2_10_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_10_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946965568)))]; + tensor h_61_cast_fp16 = mul(x = var_6735_cast_fp16_0, y = layers_c2_10_input_layernorm_weight_promoted_to_fp16)[name = string("h_61_cast_fp16")]; + tensor var_6741 = const()[name = string("op_6741"), val = tensor([0, 2, 1])]; + tensor var_6744_axes_0 = const()[name = string("op_6744_axes_0"), val = tensor([2])]; + tensor var_6742_cast_fp16 = transpose(perm = var_6741, x = h_61_cast_fp16)[name = string("transpose_125")]; + tensor var_6744_cast_fp16 = expand_dims(axes = var_6744_axes_0, x = var_6742_cast_fp16)[name = string("op_6744_cast_fp16")]; + string var_6760_pad_type_0 = const()[name = string("op_6760_pad_type_0"), val = string("valid")]; + tensor var_6760_strides_0 = const()[name = string("op_6760_strides_0"), val = tensor([1, 1])]; + tensor var_6760_pad_0 = const()[name = string("op_6760_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6760_dilations_0 = const()[name = string("op_6760_dilations_0"), val = tensor([1, 1])]; + int32 var_6760_groups_0 = const()[name = string("op_6760_groups_0"), val = int32(1)]; + tensor var_6760 = conv(dilations = var_6760_dilations_0, groups = var_6760_groups_0, pad = var_6760_pad_0, pad_type = var_6760_pad_type_0, strides = var_6760_strides_0, weight = layers_c2_10_self_attn_q_proj_weight_palettized, x = var_6744_cast_fp16)[name = string("op_6760")]; + tensor var_6765 = const()[name = string("op_6765"), val = tensor([1, 8, 256, 1])]; + tensor var_6766 = reshape(shape = var_6765, x = var_6760)[name = string("op_6766")]; + tensor var_6771 = const()[name = string("op_6771"), val = tensor([0, 1, 3, 2])]; + tensor var_6781 = const()[name = string("op_6781"), val = tensor([1, 8, 256])]; + tensor var_6772 = transpose(perm = var_6771, x = var_6766)[name = string("transpose_124")]; + tensor x_201 = reshape(shape = var_6781, x = var_6772)[name = string("x_201")]; + int32 var_6787 = const()[name = string("op_6787"), val = int32(-1)]; + fp16 const_119_promoted = const()[name = string("const_119_promoted"), val = fp16(-0x1p+0)]; + tensor var_6789 = mul(x = x_201, y = const_119_promoted)[name = string("op_6789")]; + bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; + tensor input_301 = concat(axis = var_6787, interleave = input_301_interleave_0, values = (x_201, var_6789))[name = string("input_301")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_6784_to_fp16 = const()[name = string("op_6784_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_6784_to_fp16, x = input_301)[name = string("normed_285_cast_fp16")]; + tensor var_6794_split_sizes_0 = const()[name = string("op_6794_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6794_axis_0 = const()[name = string("op_6794_axis_0"), val = int32(-1)]; + tensor var_6794_0, tensor var_6794_1 = split(axis = var_6794_axis_0, split_sizes = var_6794_split_sizes_0, x = normed_285_cast_fp16)[name = string("op_6794")]; + tensor var_6796 = mul(x = var_6794_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_6796")]; + tensor var_6801 = const()[name = string("op_6801"), val = tensor([1, 8, 1, 256])]; + tensor q_83 = reshape(shape = var_6801, x = var_6796)[name = string("q_83")]; + tensor var_6803_cast_fp16 = mul(x = q_83, y = cos_s)[name = string("op_6803_cast_fp16")]; + tensor var_6804_split_sizes_0 = const()[name = string("op_6804_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6804_axis_0 = const()[name = string("op_6804_axis_0"), val = int32(-1)]; + tensor var_6804_0, tensor var_6804_1 = split(axis = var_6804_axis_0, split_sizes = var_6804_split_sizes_0, x = q_83)[name = string("op_6804")]; + fp16 const_120_promoted = const()[name = string("const_120_promoted"), val = fp16(-0x1p+0)]; + tensor var_6806 = mul(x = var_6804_1, y = const_120_promoted)[name = string("op_6806")]; + int32 var_6808 = const()[name = string("op_6808"), val = int32(-1)]; + bool var_6809_interleave_0 = const()[name = string("op_6809_interleave_0"), val = bool(false)]; + tensor var_6809 = concat(axis = var_6808, interleave = var_6809_interleave_0, values = (var_6806, var_6804_0))[name = string("op_6809")]; + tensor var_6810_cast_fp16 = mul(x = var_6809, y = sin_s)[name = string("op_6810_cast_fp16")]; + tensor q_87_cast_fp16 = add(x = var_6803_cast_fp16, y = var_6810_cast_fp16)[name = string("q_87_cast_fp16")]; + string var_6823_pad_type_0 = const()[name = string("op_6823_pad_type_0"), val = string("valid")]; + tensor var_6823_strides_0 = const()[name = string("op_6823_strides_0"), val = tensor([1, 1])]; + tensor var_6823_pad_0 = const()[name = string("op_6823_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6823_dilations_0 = const()[name = string("op_6823_dilations_0"), val = tensor([1, 1])]; + int32 var_6823_groups_0 = const()[name = string("op_6823_groups_0"), val = int32(1)]; + tensor var_6823 = conv(dilations = var_6823_dilations_0, groups = var_6823_groups_0, pad = var_6823_pad_0, pad_type = var_6823_pad_type_0, strides = var_6823_strides_0, weight = layers_c2_10_self_attn_k_proj_weight_palettized, x = var_6744_cast_fp16)[name = string("op_6823")]; + tensor var_6828 = const()[name = string("op_6828"), val = tensor([1, 2, 256, 1])]; + tensor var_6829 = reshape(shape = var_6828, x = var_6823)[name = string("op_6829")]; + tensor var_6834 = const()[name = string("op_6834"), val = tensor([0, 1, 3, 2])]; + string var_6851_pad_type_0 = const()[name = string("op_6851_pad_type_0"), val = string("valid")]; + tensor var_6851_strides_0 = const()[name = string("op_6851_strides_0"), val = tensor([1, 1])]; + tensor var_6851_pad_0 = const()[name = string("op_6851_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6851_dilations_0 = const()[name = string("op_6851_dilations_0"), val = tensor([1, 1])]; + int32 var_6851_groups_0 = const()[name = string("op_6851_groups_0"), val = int32(1)]; + tensor var_6851 = conv(dilations = var_6851_dilations_0, groups = var_6851_groups_0, pad = var_6851_pad_0, pad_type = var_6851_pad_type_0, strides = var_6851_strides_0, weight = layers_c2_10_self_attn_v_proj_weight_palettized, x = var_6744_cast_fp16)[name = string("op_6851")]; + tensor var_6856 = const()[name = string("op_6856"), val = tensor([1, 2, 256, 1])]; + tensor var_6857 = reshape(shape = var_6856, x = var_6851)[name = string("op_6857")]; + tensor var_6862 = const()[name = string("op_6862"), val = tensor([0, 1, 3, 2])]; + tensor var_6872 = const()[name = string("op_6872"), val = tensor([1, 2, 256])]; + tensor var_6835 = transpose(perm = var_6834, x = var_6829)[name = string("transpose_123")]; + tensor x_203 = reshape(shape = var_6872, x = var_6835)[name = string("x_203")]; + int32 var_6878 = const()[name = string("op_6878"), val = int32(-1)]; + fp16 const_121_promoted = const()[name = string("const_121_promoted"), val = fp16(-0x1p+0)]; + tensor var_6880 = mul(x = x_203, y = const_121_promoted)[name = string("op_6880")]; + bool input_303_interleave_0 = const()[name = string("input_303_interleave_0"), val = bool(false)]; + tensor input_303 = concat(axis = var_6878, interleave = input_303_interleave_0, values = (x_203, var_6880))[name = string("input_303")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_6875_to_fp16 = const()[name = string("op_6875_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_6875_to_fp16, x = input_303)[name = string("normed_289_cast_fp16")]; + tensor var_6885_split_sizes_0 = const()[name = string("op_6885_split_sizes_0"), val = tensor([256, 256])]; + int32 var_6885_axis_0 = const()[name = string("op_6885_axis_0"), val = int32(-1)]; + tensor var_6885_0, tensor var_6885_1 = split(axis = var_6885_axis_0, split_sizes = var_6885_split_sizes_0, x = normed_289_cast_fp16)[name = string("op_6885")]; + tensor var_6887 = mul(x = var_6885_0, y = layers_c2_4_self_attn_k_norm_weight)[name = string("op_6887")]; + tensor var_6892 = const()[name = string("op_6892"), val = tensor([1, 2, 1, 256])]; + tensor q_85 = reshape(shape = var_6892, x = var_6887)[name = string("q_85")]; + fp16 var_6894_promoted = const()[name = string("op_6894_promoted"), val = fp16(0x1p+1)]; + tensor var_6863 = transpose(perm = var_6862, x = var_6857)[name = string("transpose_122")]; + tensor var_6895 = pow(x = var_6863, y = var_6894_promoted)[name = string("op_6895")]; + tensor var_6900_axes_0 = const()[name = string("op_6900_axes_0"), val = tensor([-1])]; + bool var_6900_keep_dims_0 = const()[name = string("op_6900_keep_dims_0"), val = bool(true)]; + tensor var_6900 = reduce_mean(axes = var_6900_axes_0, keep_dims = var_6900_keep_dims_0, x = var_6895)[name = string("op_6900")]; + fp16 var_6902_to_fp16 = const()[name = string("op_6902_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_21_cast_fp16 = add(x = var_6900, y = var_6902_to_fp16)[name = string("mean_sq_21_cast_fp16")]; + fp32 var_6904_epsilon_0 = const()[name = string("op_6904_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_6904_cast_fp16 = rsqrt(epsilon = var_6904_epsilon_0, x = mean_sq_21_cast_fp16)[name = string("op_6904_cast_fp16")]; + tensor input_307_cast_fp16 = mul(x = var_6863, y = var_6904_cast_fp16)[name = string("input_307_cast_fp16")]; + tensor var_6906_cast_fp16 = mul(x = q_85, y = cos_s)[name = string("op_6906_cast_fp16")]; + tensor var_6907_split_sizes_0 = const()[name = string("op_6907_split_sizes_0"), val = tensor([128, 128])]; + int32 var_6907_axis_0 = const()[name = string("op_6907_axis_0"), val = int32(-1)]; + tensor var_6907_0, tensor var_6907_1 = split(axis = var_6907_axis_0, split_sizes = var_6907_split_sizes_0, x = q_85)[name = string("op_6907")]; + fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; + tensor var_6909 = mul(x = var_6907_1, y = const_122_promoted)[name = string("op_6909")]; + int32 var_6911 = const()[name = string("op_6911"), val = int32(-1)]; + bool var_6912_interleave_0 = const()[name = string("op_6912_interleave_0"), val = bool(false)]; + tensor var_6912 = concat(axis = var_6911, interleave = var_6912_interleave_0, values = (var_6909, var_6907_0))[name = string("op_6912")]; + tensor var_6913_cast_fp16 = mul(x = var_6912, y = sin_s)[name = string("op_6913_cast_fp16")]; + tensor input_305_cast_fp16 = add(x = var_6906_cast_fp16, y = var_6913_cast_fp16)[name = string("input_305_cast_fp16")]; + tensor k_padded_pad_0 = const()[name = string("k_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string k_padded_mode_0 = const()[name = string("k_padded_mode_0"), val = string("constant")]; + fp16 const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = fp16(0x0p+0)]; + tensor k_padded_cast_fp16 = pad(constant_val = const_123_to_fp16, mode = k_padded_mode_0, pad = k_padded_pad_0, x = input_305_cast_fp16)[name = string("k_padded_cast_fp16")]; + tensor v_padded_pad_0 = const()[name = string("v_padded_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 0, 256])]; + string v_padded_mode_0 = const()[name = string("v_padded_mode_0"), val = string("constant")]; + fp16 const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = fp16(0x0p+0)]; + tensor v_padded_cast_fp16 = pad(constant_val = const_124_to_fp16, mode = v_padded_mode_0, pad = v_padded_pad_0, x = input_307_cast_fp16)[name = string("v_padded_cast_fp16")]; + tensor var_6942_begin_0 = const()[name = string("op_6942_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6942_end_0 = const()[name = string("op_6942_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6942_end_mask_0 = const()[name = string("op_6942_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6942_cast_fp16 = slice_by_index(begin = var_6942_begin_0, end = var_6942_end_0, end_mask = var_6942_end_mask_0, x = K_sliding_slot_cast_fp16)[name = string("op_6942_cast_fp16")]; + int32 var_6949 = const()[name = string("op_6949"), val = int32(2)]; + bool K_sliding_out_interleave_0 = const()[name = string("K_sliding_out_interleave_0"), val = bool(false)]; + tensor K_sliding_out_cast_fp16 = concat(axis = var_6949, interleave = K_sliding_out_interleave_0, values = (var_6942_cast_fp16, k_padded_cast_fp16))[name = string("K_sliding_out_cast_fp16")]; + tensor var_6965_begin_0 = const()[name = string("op_6965_begin_0"), val = tensor([0, 0, 1, 0])]; + tensor var_6965_end_0 = const()[name = string("op_6965_end_0"), val = tensor([1, 2, 512, 512])]; + tensor var_6965_end_mask_0 = const()[name = string("op_6965_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_6965_cast_fp16 = slice_by_index(begin = var_6965_begin_0, end = var_6965_end_0, end_mask = var_6965_end_mask_0, x = V_sliding_slot_cast_fp16)[name = string("op_6965_cast_fp16")]; + int32 var_6972 = const()[name = string("op_6972"), val = int32(2)]; + bool V_sliding_out_interleave_0 = const()[name = string("V_sliding_out_interleave_0"), val = bool(false)]; + tensor V_sliding_out_cast_fp16 = concat(axis = var_6972, interleave = V_sliding_out_interleave_0, values = (var_6965_cast_fp16, v_padded_cast_fp16))[name = string("V_sliding_out_cast_fp16")]; + tensor K_for_attn_21_begin_0 = const()[name = string("K_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor K_for_attn_21_end_0 = const()[name = string("K_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor K_for_attn_21_end_mask_0 = const()[name = string("K_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor kv13_k = slice_by_index(begin = K_for_attn_21_begin_0, end = K_for_attn_21_end_0, end_mask = K_for_attn_21_end_mask_0, x = K_sliding_out_cast_fp16)[name = string("K_for_attn_21_cast_fp16")]; + tensor V_for_attn_21_begin_0 = const()[name = string("V_for_attn_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor V_for_attn_21_end_0 = const()[name = string("V_for_attn_21_end_0"), val = tensor([1, 2, 512, 256])]; + tensor V_for_attn_21_end_mask_0 = const()[name = string("V_for_attn_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor kv13_v = slice_by_index(begin = V_for_attn_21_begin_0, end = V_for_attn_21_end_0, end_mask = V_for_attn_21_end_mask_0, x = V_sliding_out_cast_fp16)[name = string("V_for_attn_21_cast_fp16")]; + tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_20_reps_0 = const()[name = string("tile_20_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_40_cast_fp16 = transpose(perm = transpose_40_perm_0, x = kv13_k)[name = string("transpose_121")]; + tensor tile_20_cast_fp16 = tile(reps = tile_20_reps_0, x = transpose_40_cast_fp16)[name = string("tile_20_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_40_cast_fp16 = reshape(shape = concat_40, x = tile_20_cast_fp16)[name = string("reshape_40_cast_fp16")]; + tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_40_cast_fp16)[name = string("transpose_120")]; + tensor reshape_41_cast_fp16 = reshape(shape = concat_41, x = transpose_41_cast_fp16)[name = string("reshape_41_cast_fp16")]; + tensor transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_21_reps_0 = const()[name = string("tile_21_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_42_cast_fp16 = transpose(perm = transpose_42_perm_0, x = kv13_v)[name = string("transpose_119")]; + tensor tile_21_cast_fp16 = tile(reps = tile_21_reps_0, x = transpose_42_cast_fp16)[name = string("tile_21_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_42_cast_fp16 = reshape(shape = concat_42, x = tile_21_cast_fp16)[name = string("reshape_42_cast_fp16")]; + tensor transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_43_cast_fp16 = transpose(perm = transpose_43_perm_0, x = reshape_42_cast_fp16)[name = string("transpose_118")]; + tensor reshape_43_cast_fp16 = reshape(shape = concat_43, x = transpose_43_cast_fp16)[name = string("reshape_43_cast_fp16")]; + tensor V_expanded_21_perm_0 = const()[name = string("V_expanded_21_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_41_transpose_x_0 = const()[name = string("attn_weights_41_transpose_x_0"), val = bool(false)]; + bool attn_weights_41_transpose_y_0 = const()[name = string("attn_weights_41_transpose_y_0"), val = bool(false)]; + tensor transpose_94_cast_fp16 = transpose(perm = transpose_94_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_117")]; + tensor attn_weights_41_cast_fp16 = matmul(transpose_x = attn_weights_41_transpose_x_0, transpose_y = attn_weights_41_transpose_y_0, x = q_87_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask_sliding)[name = string("x_207_cast_fp16")]; + tensor reduce_max_10_axes_0 = const()[name = string("reduce_max_10_axes_0"), val = tensor([-1])]; + bool reduce_max_10_keep_dims_0 = const()[name = string("reduce_max_10_keep_dims_0"), val = bool(true)]; + tensor reduce_max_10 = reduce_max(axes = reduce_max_10_axes_0, keep_dims = reduce_max_10_keep_dims_0, x = x_207_cast_fp16)[name = string("reduce_max_10")]; + tensor var_7023 = sub(x = x_207_cast_fp16, y = reduce_max_10)[name = string("op_7023")]; + tensor var_7029 = exp(x = var_7023)[name = string("op_7029")]; + tensor var_7039_axes_0 = const()[name = string("op_7039_axes_0"), val = tensor([-1])]; + bool var_7039_keep_dims_0 = const()[name = string("op_7039_keep_dims_0"), val = bool(true)]; + tensor var_7039 = reduce_sum(axes = var_7039_axes_0, keep_dims = var_7039_keep_dims_0, x = var_7029)[name = string("op_7039")]; + tensor var_7045_cast_fp16 = real_div(x = var_7029, y = var_7039)[name = string("op_7045_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor V_expanded_21_cast_fp16 = transpose(perm = V_expanded_21_perm_0, x = reshape_43_cast_fp16)[name = string("transpose_116")]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = var_7045_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_7056 = const()[name = string("op_7056"), val = tensor([0, 2, 1, 3])]; + tensor var_7063 = const()[name = string("op_7063"), val = tensor([1, 1, -1])]; + tensor var_7057_cast_fp16 = transpose(perm = var_7056, x = attn_output_61_cast_fp16)[name = string("transpose_115")]; + tensor attn_output_63_cast_fp16 = reshape(shape = var_7063, x = var_7057_cast_fp16)[name = string("attn_output_63_cast_fp16")]; + tensor var_7068 = const()[name = string("op_7068"), val = tensor([0, 2, 1])]; + string var_7084_pad_type_0 = const()[name = string("op_7084_pad_type_0"), val = string("valid")]; + int32 var_7084_groups_0 = const()[name = string("op_7084_groups_0"), val = int32(1)]; + tensor var_7084_strides_0 = const()[name = string("op_7084_strides_0"), val = tensor([1])]; + tensor var_7084_pad_0 = const()[name = string("op_7084_pad_0"), val = tensor([0, 0])]; + tensor var_7084_dilations_0 = const()[name = string("op_7084_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946970752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949592256))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7069_cast_fp16 = transpose(perm = var_7068, x = attn_output_63_cast_fp16)[name = string("transpose_114")]; + tensor var_7084_cast_fp16 = conv(dilations = var_7084_dilations_0, groups = var_7084_groups_0, pad = var_7084_pad_0, pad_type = var_7084_pad_type_0, strides = var_7084_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_7069_cast_fp16)[name = string("op_7084_cast_fp16")]; + tensor var_7088 = const()[name = string("op_7088"), val = tensor([0, 2, 1])]; + int32 var_7094 = const()[name = string("op_7094"), val = int32(-1)]; + fp16 const_125_promoted_to_fp16 = const()[name = string("const_125_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_211_cast_fp16 = transpose(perm = var_7088, x = var_7084_cast_fp16)[name = string("transpose_113")]; + tensor var_7096_cast_fp16 = mul(x = x_211_cast_fp16, y = const_125_promoted_to_fp16)[name = string("op_7096_cast_fp16")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311_cast_fp16 = concat(axis = var_7094, interleave = input_311_interleave_0, values = (x_211_cast_fp16, var_7096_cast_fp16))[name = string("input_311_cast_fp16")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_7091_to_fp16 = const()[name = string("op_7091_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_7091_to_fp16, x = input_311_cast_fp16)[name = string("normed_293_cast_fp16")]; + tensor var_7101_split_sizes_0 = const()[name = string("op_7101_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7101_axis_0 = const()[name = string("op_7101_axis_0"), val = int32(-1)]; + tensor var_7101_cast_fp16_0, tensor var_7101_cast_fp16_1 = split(axis = var_7101_axis_0, split_sizes = var_7101_split_sizes_0, x = normed_293_cast_fp16)[name = string("op_7101_cast_fp16")]; + tensor layers_c2_10_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_10_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949594880)))]; + tensor attn_output_65_cast_fp16 = mul(x = var_7101_cast_fp16_0, y = layers_c2_10_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_199_cast_fp16, y = attn_output_65_cast_fp16)[name = string("x_213_cast_fp16")]; + int32 var_7110 = const()[name = string("op_7110"), val = int32(-1)]; + fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7112_cast_fp16 = mul(x = x_213_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_7112_cast_fp16")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313_cast_fp16 = concat(axis = var_7110, interleave = input_313_interleave_0, values = (x_213_cast_fp16, var_7112_cast_fp16))[name = string("input_313_cast_fp16")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_7107_to_fp16 = const()[name = string("op_7107_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_7107_to_fp16, x = input_313_cast_fp16)[name = string("normed_297_cast_fp16")]; + tensor var_7117_split_sizes_0 = const()[name = string("op_7117_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7117_axis_0 = const()[name = string("op_7117_axis_0"), val = int32(-1)]; + tensor var_7117_cast_fp16_0, tensor var_7117_cast_fp16_1 = split(axis = var_7117_axis_0, split_sizes = var_7117_split_sizes_0, x = normed_297_cast_fp16)[name = string("op_7117_cast_fp16")]; + tensor layers_c2_10_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_10_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949600064)))]; + tensor h_63_cast_fp16 = mul(x = var_7117_cast_fp16_0, y = layers_c2_10_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_63_cast_fp16")]; + tensor var_7128 = const()[name = string("op_7128"), val = tensor([0, 2, 1])]; + tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; + tensor var_7129 = transpose(perm = var_7128, x = h_63_cast_fp16)[name = string("transpose_112")]; + tensor input_315 = expand_dims(axes = input_315_axes_0, x = var_7129)[name = string("input_315")]; + string gate_41_pad_type_0 = const()[name = string("gate_41_pad_type_0"), val = string("valid")]; + tensor gate_41_strides_0 = const()[name = string("gate_41_strides_0"), val = tensor([1, 1])]; + tensor gate_41_pad_0 = const()[name = string("gate_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_41_dilations_0 = const()[name = string("gate_41_dilations_0"), val = tensor([1, 1])]; + int32 gate_41_groups_0 = const()[name = string("gate_41_groups_0"), val = int32(1)]; + tensor gate_41 = conv(dilations = gate_41_dilations_0, groups = gate_41_groups_0, pad = gate_41_pad_0, pad_type = gate_41_pad_type_0, strides = gate_41_strides_0, weight = layers_c2_10_mlp_gate_proj_weight_palettized, x = input_315)[name = string("gate_41")]; + string up_21_pad_type_0 = const()[name = string("up_21_pad_type_0"), val = string("valid")]; + tensor up_21_strides_0 = const()[name = string("up_21_strides_0"), val = tensor([1, 1])]; + tensor up_21_pad_0 = const()[name = string("up_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_21_dilations_0 = const()[name = string("up_21_dilations_0"), val = tensor([1, 1])]; + int32 up_21_groups_0 = const()[name = string("up_21_groups_0"), val = int32(1)]; + tensor up_21 = conv(dilations = up_21_dilations_0, groups = up_21_groups_0, pad = up_21_pad_0, pad_type = up_21_pad_type_0, strides = up_21_strides_0, weight = layers_c2_10_mlp_up_proj_weight_palettized, x = input_315)[name = string("up_21")]; + string gate_43_mode_0 = const()[name = string("gate_43_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_43 = gelu(mode = gate_43_mode_0, x = gate_41)[name = string("gate_43")]; + tensor input_317 = mul(x = gate_43, y = up_21)[name = string("input_317")]; + string mlp_out_21_pad_type_0 = const()[name = string("mlp_out_21_pad_type_0"), val = string("valid")]; + tensor mlp_out_21_strides_0 = const()[name = string("mlp_out_21_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_21_pad_0 = const()[name = string("mlp_out_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_21_dilations_0 = const()[name = string("mlp_out_21_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_21_groups_0 = const()[name = string("mlp_out_21_groups_0"), val = int32(1)]; + tensor mlp_out_21 = conv(dilations = mlp_out_21_dilations_0, groups = mlp_out_21_groups_0, pad = mlp_out_21_pad_0, pad_type = mlp_out_21_pad_type_0, strides = mlp_out_21_strides_0, weight = layers_c2_10_mlp_down_proj_weight_palettized, x = input_317)[name = string("mlp_out_21")]; + tensor var_7169_axes_0 = const()[name = string("op_7169_axes_0"), val = tensor([2])]; + tensor var_7169 = squeeze(axes = var_7169_axes_0, x = mlp_out_21)[name = string("op_7169")]; + tensor var_7173 = const()[name = string("op_7173"), val = tensor([0, 2, 1])]; + int32 var_7179 = const()[name = string("op_7179"), val = int32(-1)]; + fp16 const_127_promoted = const()[name = string("const_127_promoted"), val = fp16(-0x1p+0)]; + tensor x_215 = transpose(perm = var_7173, x = var_7169)[name = string("transpose_111")]; + tensor var_7181 = mul(x = x_215, y = const_127_promoted)[name = string("op_7181")]; + bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; + tensor input_319 = concat(axis = var_7179, interleave = input_319_interleave_0, values = (x_215, var_7181))[name = string("input_319")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_7176_to_fp16 = const()[name = string("op_7176_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_7176_to_fp16, x = input_319)[name = string("normed_301_cast_fp16")]; + tensor var_7186_split_sizes_0 = const()[name = string("op_7186_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7186_axis_0 = const()[name = string("op_7186_axis_0"), val = int32(-1)]; + tensor var_7186_0, tensor var_7186_1 = split(axis = var_7186_axis_0, split_sizes = var_7186_split_sizes_0, x = normed_301_cast_fp16)[name = string("op_7186")]; + tensor hidden_states_103 = mul(x = var_7186_0, y = layers_c2_10_post_feedforward_layernorm_weight)[name = string("hidden_states_103")]; + tensor hidden_states_105_cast_fp16 = add(x = x_213_cast_fp16, y = hidden_states_103)[name = string("hidden_states_105_cast_fp16")]; + tensor per_layer_slice_21_begin_0 = const()[name = string("per_layer_slice_21_begin_0"), val = tensor([0, 0, 5632])]; + tensor per_layer_slice_21_end_0 = const()[name = string("per_layer_slice_21_end_0"), val = tensor([1, 1, 5888])]; + tensor per_layer_slice_21_end_mask_0 = const()[name = string("per_layer_slice_21_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_21_cast_fp16 = slice_by_index(begin = per_layer_slice_21_begin_0, end = per_layer_slice_21_end_0, end_mask = per_layer_slice_21_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_21_cast_fp16")]; + tensor var_7214 = const()[name = string("op_7214"), val = tensor([0, 2, 1])]; + tensor input_321_axes_0 = const()[name = string("input_321_axes_0"), val = tensor([2])]; + tensor var_7215 = transpose(perm = var_7214, x = hidden_states_105_cast_fp16)[name = string("transpose_110")]; + tensor input_321 = expand_dims(axes = input_321_axes_0, x = var_7215)[name = string("input_321")]; + string gated_61_pad_type_0 = const()[name = string("gated_61_pad_type_0"), val = string("valid")]; + tensor gated_61_strides_0 = const()[name = string("gated_61_strides_0"), val = tensor([1, 1])]; + tensor gated_61_pad_0 = const()[name = string("gated_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_61_dilations_0 = const()[name = string("gated_61_dilations_0"), val = tensor([1, 1])]; + int32 gated_61_groups_0 = const()[name = string("gated_61_groups_0"), val = int32(1)]; + tensor gated_61 = conv(dilations = gated_61_dilations_0, groups = gated_61_groups_0, pad = gated_61_pad_0, pad_type = gated_61_pad_type_0, strides = gated_61_strides_0, weight = layers_c2_10_per_layer_input_gate_weight_palettized, x = input_321)[name = string("gated_61")]; + string gated_63_mode_0 = const()[name = string("gated_63_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_63 = gelu(mode = gated_63_mode_0, x = gated_61)[name = string("gated_63")]; + tensor var_7234 = const()[name = string("op_7234"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_21_axes_0 = const()[name = string("per_layer_slice_conv_21_axes_0"), val = tensor([2])]; + tensor var_7235_cast_fp16 = transpose(perm = var_7234, x = per_layer_slice_21_cast_fp16)[name = string("transpose_109")]; + tensor per_layer_slice_conv_21_cast_fp16 = expand_dims(axes = per_layer_slice_conv_21_axes_0, x = var_7235_cast_fp16)[name = string("per_layer_slice_conv_21_cast_fp16")]; + tensor input_323_cast_fp16 = mul(x = gated_63, y = per_layer_slice_conv_21_cast_fp16)[name = string("input_323_cast_fp16")]; + string gated_65_pad_type_0 = const()[name = string("gated_65_pad_type_0"), val = string("valid")]; + tensor gated_65_strides_0 = const()[name = string("gated_65_strides_0"), val = tensor([1, 1])]; + tensor gated_65_pad_0 = const()[name = string("gated_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_65_dilations_0 = const()[name = string("gated_65_dilations_0"), val = tensor([1, 1])]; + int32 gated_65_groups_0 = const()[name = string("gated_65_groups_0"), val = int32(1)]; + tensor layers_c2_10_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949605248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949932992))))[name = string("layers_c2_10_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_65_cast_fp16 = conv(dilations = gated_65_dilations_0, groups = gated_65_groups_0, pad = gated_65_pad_0, pad_type = gated_65_pad_type_0, strides = gated_65_strides_0, weight = layers_c2_10_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_323_cast_fp16)[name = string("gated_65_cast_fp16")]; + tensor var_7251_axes_0 = const()[name = string("op_7251_axes_0"), val = tensor([2])]; + tensor var_7251_cast_fp16 = squeeze(axes = var_7251_axes_0, x = gated_65_cast_fp16)[name = string("op_7251_cast_fp16")]; + tensor var_7255 = const()[name = string("op_7255"), val = tensor([0, 2, 1])]; + int32 var_7261 = const()[name = string("op_7261"), val = int32(-1)]; + fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_217_cast_fp16 = transpose(perm = var_7255, x = var_7251_cast_fp16)[name = string("transpose_108")]; + tensor var_7263_cast_fp16 = mul(x = x_217_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_7263_cast_fp16")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325_cast_fp16 = concat(axis = var_7261, interleave = input_325_interleave_0, values = (x_217_cast_fp16, var_7263_cast_fp16))[name = string("input_325_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_7258_to_fp16 = const()[name = string("op_7258_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_7258_to_fp16, x = input_325_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor var_7268_split_sizes_0 = const()[name = string("op_7268_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7268_axis_0 = const()[name = string("op_7268_axis_0"), val = int32(-1)]; + tensor var_7268_cast_fp16_0, tensor var_7268_cast_fp16_1 = split(axis = var_7268_axis_0, split_sizes = var_7268_split_sizes_0, x = normed_305_cast_fp16)[name = string("op_7268_cast_fp16")]; + tensor layers_c2_10_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_10_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949935616)))]; + tensor hidden_states_109_cast_fp16 = mul(x = var_7268_cast_fp16_0, y = layers_c2_10_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_109_cast_fp16")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_105_cast_fp16, y = hidden_states_109_cast_fp16)[name = string("hidden_states_111_cast_fp16")]; + tensor const_129_promoted_to_fp16 = const()[name = string("const_129_promoted_to_fp16"), val = tensor([0x1.42p-3])]; + tensor x_219_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_129_promoted_to_fp16)[name = string("x_219_cast_fp16")]; + tensor var_7280_axes_0 = const()[name = string("op_7280_axes_0"), val = tensor([0])]; + tensor var_7280_cast_fp16 = squeeze(axes = var_7280_axes_0, x = K_sliding_out_cast_fp16)[name = string("op_7280_cast_fp16")]; + tensor var_7282_axes_0 = const()[name = string("op_7282_axes_0"), val = tensor([0])]; + tensor var_7282_cast_fp16 = squeeze(axes = var_7282_axes_0, x = V_sliding_out_cast_fp16)[name = string("op_7282_cast_fp16")]; + tensor var_7285_begin_0 = const()[name = string("op_7285_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_7285_end_0 = const()[name = string("op_7285_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_7285_end_mask_0 = const()[name = string("op_7285_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7285_squeeze_mask_0 = const()[name = string("op_7285_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_7285_cast_fp16 = slice_by_index(begin = var_7285_begin_0, end = var_7285_end_0, end_mask = var_7285_end_mask_0, squeeze_mask = var_7285_squeeze_mask_0, x = K_full_in)[name = string("op_7285_cast_fp16")]; + tensor K_full_slot_axes_0 = const()[name = string("K_full_slot_axes_0"), val = tensor([0])]; + tensor K_full_slot_cast_fp16 = expand_dims(axes = K_full_slot_axes_0, x = var_7285_cast_fp16)[name = string("K_full_slot_cast_fp16")]; + tensor var_7290_begin_0 = const()[name = string("op_7290_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_7290_end_0 = const()[name = string("op_7290_end_0"), val = tensor([2, 2, 2048, 512])]; + tensor var_7290_end_mask_0 = const()[name = string("op_7290_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7290_squeeze_mask_0 = const()[name = string("op_7290_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor var_7290_cast_fp16 = slice_by_index(begin = var_7290_begin_0, end = var_7290_end_0, end_mask = var_7290_end_mask_0, squeeze_mask = var_7290_squeeze_mask_0, x = V_full_in)[name = string("op_7290_cast_fp16")]; + tensor V_full_slot_axes_0 = const()[name = string("V_full_slot_axes_0"), val = tensor([0])]; + tensor V_full_slot_cast_fp16 = expand_dims(axes = V_full_slot_axes_0, x = var_7290_cast_fp16)[name = string("V_full_slot_cast_fp16")]; + int32 var_7297 = const()[name = string("op_7297"), val = int32(-1)]; + fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7299_cast_fp16 = mul(x = x_219_cast_fp16, y = const_130_promoted_to_fp16)[name = string("op_7299_cast_fp16")]; + bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; + tensor input_327_cast_fp16 = concat(axis = var_7297, interleave = input_327_interleave_0, values = (x_219_cast_fp16, var_7299_cast_fp16))[name = string("input_327_cast_fp16")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_7294_to_fp16 = const()[name = string("op_7294_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_7294_to_fp16, x = input_327_cast_fp16)[name = string("normed_309_cast_fp16")]; + tensor var_7304_split_sizes_0 = const()[name = string("op_7304_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7304_axis_0 = const()[name = string("op_7304_axis_0"), val = int32(-1)]; + tensor var_7304_cast_fp16_0, tensor var_7304_cast_fp16_1 = split(axis = var_7304_axis_0, split_sizes = var_7304_split_sizes_0, x = normed_309_cast_fp16)[name = string("op_7304_cast_fp16")]; + tensor layers_c2_11_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_11_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949940800)))]; + tensor h_67_cast_fp16 = mul(x = var_7304_cast_fp16_0, y = layers_c2_11_input_layernorm_weight_promoted_to_fp16)[name = string("h_67_cast_fp16")]; + tensor var_7310 = const()[name = string("op_7310"), val = tensor([0, 2, 1])]; + tensor var_7313_axes_0 = const()[name = string("op_7313_axes_0"), val = tensor([2])]; + tensor var_7311_cast_fp16 = transpose(perm = var_7310, x = h_67_cast_fp16)[name = string("transpose_107")]; + tensor var_7313_cast_fp16 = expand_dims(axes = var_7313_axes_0, x = var_7311_cast_fp16)[name = string("op_7313_cast_fp16")]; + string var_7329_pad_type_0 = const()[name = string("op_7329_pad_type_0"), val = string("valid")]; + tensor var_7329_strides_0 = const()[name = string("op_7329_strides_0"), val = tensor([1, 1])]; + tensor var_7329_pad_0 = const()[name = string("op_7329_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7329_dilations_0 = const()[name = string("op_7329_dilations_0"), val = tensor([1, 1])]; + int32 var_7329_groups_0 = const()[name = string("op_7329_groups_0"), val = int32(1)]; + tensor var_7329 = conv(dilations = var_7329_dilations_0, groups = var_7329_groups_0, pad = var_7329_pad_0, pad_type = var_7329_pad_type_0, strides = var_7329_strides_0, weight = layers_c2_11_self_attn_q_proj_weight_palettized, x = var_7313_cast_fp16)[name = string("op_7329")]; + tensor var_7334 = const()[name = string("op_7334"), val = tensor([1, 8, 512, 1])]; + tensor var_7335 = reshape(shape = var_7334, x = var_7329)[name = string("op_7335")]; + tensor var_7340 = const()[name = string("op_7340"), val = tensor([0, 1, 3, 2])]; + tensor var_7350 = const()[name = string("op_7350"), val = tensor([1, 8, 512])]; + tensor var_7341 = transpose(perm = var_7340, x = var_7335)[name = string("transpose_106")]; + tensor x_221 = reshape(shape = var_7350, x = var_7341)[name = string("x_221")]; + int32 var_7356 = const()[name = string("op_7356"), val = int32(-1)]; + fp16 const_131_promoted = const()[name = string("const_131_promoted"), val = fp16(-0x1p+0)]; + tensor var_7358 = mul(x = x_221, y = const_131_promoted)[name = string("op_7358")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331 = concat(axis = var_7356, interleave = input_331_interleave_0, values = (x_221, var_7358))[name = string("input_331")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_7353_to_fp16 = const()[name = string("op_7353_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_7353_to_fp16, x = input_331)[name = string("normed_313_cast_fp16")]; + tensor var_7363_split_sizes_0 = const()[name = string("op_7363_split_sizes_0"), val = tensor([512, 512])]; + int32 var_7363_axis_0 = const()[name = string("op_7363_axis_0"), val = int32(-1)]; + tensor var_7363_0, tensor var_7363_1 = split(axis = var_7363_axis_0, split_sizes = var_7363_split_sizes_0, x = normed_313_cast_fp16)[name = string("op_7363")]; + tensor var_7365 = mul(x = var_7363_0, y = layers_c2_11_self_attn_q_norm_weight)[name = string("op_7365")]; + tensor var_7370 = const()[name = string("op_7370"), val = tensor([1, 8, 1, 512])]; + tensor q_91 = reshape(shape = var_7370, x = var_7365)[name = string("q_91")]; + tensor var_7372_cast_fp16 = mul(x = q_91, y = cos_f)[name = string("op_7372_cast_fp16")]; + tensor var_7373_split_sizes_0 = const()[name = string("op_7373_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7373_axis_0 = const()[name = string("op_7373_axis_0"), val = int32(-1)]; + tensor var_7373_0, tensor var_7373_1 = split(axis = var_7373_axis_0, split_sizes = var_7373_split_sizes_0, x = q_91)[name = string("op_7373")]; + fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; + tensor var_7375 = mul(x = var_7373_1, y = const_132_promoted)[name = string("op_7375")]; + int32 var_7377 = const()[name = string("op_7377"), val = int32(-1)]; + bool var_7378_interleave_0 = const()[name = string("op_7378_interleave_0"), val = bool(false)]; + tensor var_7378 = concat(axis = var_7377, interleave = var_7378_interleave_0, values = (var_7375, var_7373_0))[name = string("op_7378")]; + tensor var_7379_cast_fp16 = mul(x = var_7378, y = sin_f)[name = string("op_7379_cast_fp16")]; + tensor q_95_cast_fp16 = add(x = var_7372_cast_fp16, y = var_7379_cast_fp16)[name = string("q_95_cast_fp16")]; + string var_7392_pad_type_0 = const()[name = string("op_7392_pad_type_0"), val = string("valid")]; + tensor var_7392_strides_0 = const()[name = string("op_7392_strides_0"), val = tensor([1, 1])]; + tensor var_7392_pad_0 = const()[name = string("op_7392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7392_dilations_0 = const()[name = string("op_7392_dilations_0"), val = tensor([1, 1])]; + int32 var_7392_groups_0 = const()[name = string("op_7392_groups_0"), val = int32(1)]; + tensor var_7392 = conv(dilations = var_7392_dilations_0, groups = var_7392_groups_0, pad = var_7392_pad_0, pad_type = var_7392_pad_type_0, strides = var_7392_strides_0, weight = layers_c2_11_self_attn_k_proj_weight_palettized, x = var_7313_cast_fp16)[name = string("op_7392")]; + tensor var_7397 = const()[name = string("op_7397"), val = tensor([1, 2, 512, 1])]; + tensor var_7398 = reshape(shape = var_7397, x = var_7392)[name = string("op_7398")]; + tensor var_7403 = const()[name = string("op_7403"), val = tensor([0, 1, 3, 2])]; + string var_7420_pad_type_0 = const()[name = string("op_7420_pad_type_0"), val = string("valid")]; + tensor var_7420_strides_0 = const()[name = string("op_7420_strides_0"), val = tensor([1, 1])]; + tensor var_7420_pad_0 = const()[name = string("op_7420_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7420_dilations_0 = const()[name = string("op_7420_dilations_0"), val = tensor([1, 1])]; + int32 var_7420_groups_0 = const()[name = string("op_7420_groups_0"), val = int32(1)]; + tensor var_7420 = conv(dilations = var_7420_dilations_0, groups = var_7420_groups_0, pad = var_7420_pad_0, pad_type = var_7420_pad_type_0, strides = var_7420_strides_0, weight = layers_c2_11_self_attn_v_proj_weight_palettized, x = var_7313_cast_fp16)[name = string("op_7420")]; + tensor var_7425 = const()[name = string("op_7425"), val = tensor([1, 2, 512, 1])]; + tensor var_7426 = reshape(shape = var_7425, x = var_7420)[name = string("op_7426")]; + tensor var_7431 = const()[name = string("op_7431"), val = tensor([0, 1, 3, 2])]; + tensor var_7441 = const()[name = string("op_7441"), val = tensor([1, 2, 512])]; + tensor var_7404 = transpose(perm = var_7403, x = var_7398)[name = string("transpose_105")]; + tensor x_223 = reshape(shape = var_7441, x = var_7404)[name = string("x_223")]; + int32 var_7447 = const()[name = string("op_7447"), val = int32(-1)]; + fp16 const_133_promoted = const()[name = string("const_133_promoted"), val = fp16(-0x1p+0)]; + tensor var_7449 = mul(x = x_223, y = const_133_promoted)[name = string("op_7449")]; + bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; + tensor input_333 = concat(axis = var_7447, interleave = input_333_interleave_0, values = (x_223, var_7449))[name = string("input_333")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_7444_to_fp16 = const()[name = string("op_7444_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_7444_to_fp16, x = input_333)[name = string("normed_317_cast_fp16")]; + tensor var_7454_split_sizes_0 = const()[name = string("op_7454_split_sizes_0"), val = tensor([512, 512])]; + int32 var_7454_axis_0 = const()[name = string("op_7454_axis_0"), val = int32(-1)]; + tensor var_7454_0, tensor var_7454_1 = split(axis = var_7454_axis_0, split_sizes = var_7454_split_sizes_0, x = normed_317_cast_fp16)[name = string("op_7454")]; + tensor var_7456 = mul(x = var_7454_0, y = layers_c2_11_self_attn_k_norm_weight)[name = string("op_7456")]; + tensor var_7461 = const()[name = string("op_7461"), val = tensor([1, 2, 1, 512])]; + tensor q_93 = reshape(shape = var_7461, x = var_7456)[name = string("q_93")]; + fp16 var_7463_promoted = const()[name = string("op_7463_promoted"), val = fp16(0x1p+1)]; + tensor var_7432 = transpose(perm = var_7431, x = var_7426)[name = string("transpose_104")]; + tensor var_7464 = pow(x = var_7432, y = var_7463_promoted)[name = string("op_7464")]; + tensor var_7469_axes_0 = const()[name = string("op_7469_axes_0"), val = tensor([-1])]; + bool var_7469_keep_dims_0 = const()[name = string("op_7469_keep_dims_0"), val = bool(true)]; + tensor var_7469 = reduce_mean(axes = var_7469_axes_0, keep_dims = var_7469_keep_dims_0, x = var_7464)[name = string("op_7469")]; + fp16 var_7471_to_fp16 = const()[name = string("op_7471_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_sq_cast_fp16 = add(x = var_7469, y = var_7471_to_fp16)[name = string("mean_sq_cast_fp16")]; + fp32 var_7473_epsilon_0 = const()[name = string("op_7473_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_7473_cast_fp16 = rsqrt(epsilon = var_7473_epsilon_0, x = mean_sq_cast_fp16)[name = string("op_7473_cast_fp16")]; + tensor v_cast_fp16 = mul(x = var_7432, y = var_7473_cast_fp16)[name = string("v_cast_fp16")]; + tensor var_7475_cast_fp16 = mul(x = q_93, y = cos_f)[name = string("op_7475_cast_fp16")]; + tensor var_7476_split_sizes_0 = const()[name = string("op_7476_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7476_axis_0 = const()[name = string("op_7476_axis_0"), val = int32(-1)]; + tensor var_7476_0, tensor var_7476_1 = split(axis = var_7476_axis_0, split_sizes = var_7476_split_sizes_0, x = q_93)[name = string("op_7476")]; + fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; + tensor var_7478 = mul(x = var_7476_1, y = const_134_promoted)[name = string("op_7478")]; + int32 var_7480 = const()[name = string("op_7480"), val = int32(-1)]; + bool var_7481_interleave_0 = const()[name = string("op_7481_interleave_0"), val = bool(false)]; + tensor var_7481 = concat(axis = var_7480, interleave = var_7481_interleave_0, values = (var_7478, var_7476_0))[name = string("op_7481")]; + tensor var_7482_cast_fp16 = mul(x = var_7481, y = sin_f)[name = string("op_7482_cast_fp16")]; + tensor k_cast_fp16 = add(x = var_7475_cast_fp16, y = var_7482_cast_fp16)[name = string("k_cast_fp16")]; + tensor var_7488_cast_fp16 = mul(x = K_full_slot_cast_fp16, y = var_4165_cast_fp16)[name = string("op_7488_cast_fp16")]; + tensor var_7489_reps_0 = const()[name = string("op_7489_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_7489_cast_fp16 = tile(reps = var_7489_reps_0, x = k_cast_fp16)[name = string("op_7489_cast_fp16")]; + tensor var_7490_cast_fp16 = mul(x = var_7489_cast_fp16, y = update_mask)[name = string("op_7490_cast_fp16")]; + tensor kv14_k = add(x = var_7488_cast_fp16, y = var_7490_cast_fp16)[name = string("K_full_out_cast_fp16")]; + tensor var_7496_cast_fp16 = mul(x = V_full_slot_cast_fp16, y = var_4165_cast_fp16)[name = string("op_7496_cast_fp16")]; + tensor var_7497_reps_0 = const()[name = string("op_7497_reps_0"), val = tensor([1, 1, 2048, 1])]; + tensor var_7497_cast_fp16 = tile(reps = var_7497_reps_0, x = v_cast_fp16)[name = string("op_7497_cast_fp16")]; + tensor var_7498_cast_fp16 = mul(x = var_7497_cast_fp16, y = update_mask)[name = string("op_7498_cast_fp16")]; + tensor kv14_v = add(x = var_7496_cast_fp16, y = var_7498_cast_fp16)[name = string("V_full_out_cast_fp16")]; + tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_22_reps_0 = const()[name = string("tile_22_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_44_cast_fp16 = transpose(perm = transpose_44_perm_0, x = kv14_k)[name = string("transpose_103")]; + tensor tile_22_cast_fp16 = tile(reps = tile_22_reps_0, x = transpose_44_cast_fp16)[name = string("tile_22_cast_fp16")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_44_cast_fp16 = reshape(shape = concat_44, x = tile_22_cast_fp16)[name = string("reshape_44_cast_fp16")]; + tensor transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_45 = const()[name = string("concat_45"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_45_cast_fp16 = transpose(perm = transpose_45_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_102")]; + tensor reshape_45_cast_fp16 = reshape(shape = concat_45, x = transpose_45_cast_fp16)[name = string("reshape_45_cast_fp16")]; + tensor transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_23_reps_0 = const()[name = string("tile_23_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_46_cast_fp16 = transpose(perm = transpose_46_perm_0, x = kv14_v)[name = string("transpose_101")]; + tensor tile_23_cast_fp16 = tile(reps = tile_23_reps_0, x = transpose_46_cast_fp16)[name = string("tile_23_cast_fp16")]; + tensor concat_46 = const()[name = string("concat_46"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_46_cast_fp16 = reshape(shape = concat_46, x = tile_23_cast_fp16)[name = string("reshape_46_cast_fp16")]; + tensor transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_47 = const()[name = string("concat_47"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_47_cast_fp16 = transpose(perm = transpose_47_perm_0, x = reshape_46_cast_fp16)[name = string("transpose_100")]; + tensor reshape_47_cast_fp16 = reshape(shape = concat_47, x = transpose_47_cast_fp16)[name = string("reshape_47_cast_fp16")]; + tensor V_expanded_23_perm_0 = const()[name = string("V_expanded_23_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_45_transpose_x_0 = const()[name = string("attn_weights_45_transpose_x_0"), val = bool(false)]; + bool attn_weights_45_transpose_y_0 = const()[name = string("attn_weights_45_transpose_y_0"), val = bool(false)]; + tensor transpose_95_cast_fp16 = transpose(perm = transpose_95_perm_0, x = reshape_45_cast_fp16)[name = string("transpose_99")]; + tensor attn_weights_45_cast_fp16 = matmul(transpose_x = attn_weights_45_transpose_x_0, transpose_y = attn_weights_45_transpose_y_0, x = q_95_cast_fp16, y = transpose_95_cast_fp16)[name = string("attn_weights_45_cast_fp16")]; + tensor x_227_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask_full)[name = string("x_227_cast_fp16")]; + tensor reduce_max_11_axes_0 = const()[name = string("reduce_max_11_axes_0"), val = tensor([-1])]; + bool reduce_max_11_keep_dims_0 = const()[name = string("reduce_max_11_keep_dims_0"), val = bool(true)]; + tensor reduce_max_11 = reduce_max(axes = reduce_max_11_axes_0, keep_dims = reduce_max_11_keep_dims_0, x = x_227_cast_fp16)[name = string("reduce_max_11")]; + tensor var_7550 = sub(x = x_227_cast_fp16, y = reduce_max_11)[name = string("op_7550")]; + tensor var_7556 = exp(x = var_7550)[name = string("op_7556")]; + tensor var_7566_axes_0 = const()[name = string("op_7566_axes_0"), val = tensor([-1])]; + bool var_7566_keep_dims_0 = const()[name = string("op_7566_keep_dims_0"), val = bool(true)]; + tensor var_7566 = reduce_sum(axes = var_7566_axes_0, keep_dims = var_7566_keep_dims_0, x = var_7556)[name = string("op_7566")]; + tensor var_7572_cast_fp16 = real_div(x = var_7556, y = var_7566)[name = string("op_7572_cast_fp16")]; + bool attn_output_67_transpose_x_0 = const()[name = string("attn_output_67_transpose_x_0"), val = bool(false)]; + bool attn_output_67_transpose_y_0 = const()[name = string("attn_output_67_transpose_y_0"), val = bool(false)]; + tensor V_expanded_23_cast_fp16 = transpose(perm = V_expanded_23_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_98")]; + tensor attn_output_67_cast_fp16 = matmul(transpose_x = attn_output_67_transpose_x_0, transpose_y = attn_output_67_transpose_y_0, x = var_7572_cast_fp16, y = V_expanded_23_cast_fp16)[name = string("attn_output_67_cast_fp16")]; + tensor var_7583 = const()[name = string("op_7583"), val = tensor([0, 2, 1, 3])]; + tensor var_7590 = const()[name = string("op_7590"), val = tensor([1, 1, -1])]; + tensor var_7584_cast_fp16 = transpose(perm = var_7583, x = attn_output_67_cast_fp16)[name = string("transpose_97")]; + tensor attn_output_69_cast_fp16 = reshape(shape = var_7590, x = var_7584_cast_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor var_7595 = const()[name = string("op_7595"), val = tensor([0, 2, 1])]; + string var_7611_pad_type_0 = const()[name = string("op_7611_pad_type_0"), val = string("valid")]; + int32 var_7611_groups_0 = const()[name = string("op_7611_groups_0"), val = int32(1)]; + tensor var_7611_strides_0 = const()[name = string("op_7611_strides_0"), val = tensor([1])]; + tensor var_7611_pad_0 = const()[name = string("op_7611_pad_0"), val = tensor([0, 0])]; + tensor var_7611_dilations_0 = const()[name = string("op_7611_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(949945984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(955188928))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7596_cast_fp16 = transpose(perm = var_7595, x = attn_output_69_cast_fp16)[name = string("transpose_96")]; + tensor var_7611_cast_fp16 = conv(dilations = var_7611_dilations_0, groups = var_7611_groups_0, pad = var_7611_pad_0, pad_type = var_7611_pad_type_0, strides = var_7611_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7596_cast_fp16)[name = string("op_7611_cast_fp16")]; + tensor var_7615 = const()[name = string("op_7615"), val = tensor([0, 2, 1])]; + int32 var_7621 = const()[name = string("op_7621"), val = int32(-1)]; + fp16 const_135_promoted_to_fp16 = const()[name = string("const_135_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_231_cast_fp16 = transpose(perm = var_7615, x = var_7611_cast_fp16)[name = string("transpose_95")]; + tensor var_7623_cast_fp16 = mul(x = x_231_cast_fp16, y = const_135_promoted_to_fp16)[name = string("op_7623_cast_fp16")]; + bool input_337_interleave_0 = const()[name = string("input_337_interleave_0"), val = bool(false)]; + tensor input_337_cast_fp16 = concat(axis = var_7621, interleave = input_337_interleave_0, values = (x_231_cast_fp16, var_7623_cast_fp16))[name = string("input_337_cast_fp16")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_7618_to_fp16 = const()[name = string("op_7618_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_7618_to_fp16, x = input_337_cast_fp16)[name = string("normed_321_cast_fp16")]; + tensor var_7628_split_sizes_0 = const()[name = string("op_7628_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7628_axis_0 = const()[name = string("op_7628_axis_0"), val = int32(-1)]; + tensor var_7628_cast_fp16_0, tensor var_7628_cast_fp16_1 = split(axis = var_7628_axis_0, split_sizes = var_7628_split_sizes_0, x = normed_321_cast_fp16)[name = string("op_7628_cast_fp16")]; + tensor layers_c2_11_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_11_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(955191552)))]; + tensor attn_output_71_cast_fp16 = mul(x = var_7628_cast_fp16_0, y = layers_c2_11_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_71_cast_fp16")]; + tensor x_233_cast_fp16 = add(x = x_219_cast_fp16, y = attn_output_71_cast_fp16)[name = string("x_233_cast_fp16")]; + int32 var_7637 = const()[name = string("op_7637"), val = int32(-1)]; + fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7639_cast_fp16 = mul(x = x_233_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_7639_cast_fp16")]; + bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; + tensor input_339_cast_fp16 = concat(axis = var_7637, interleave = input_339_interleave_0, values = (x_233_cast_fp16, var_7639_cast_fp16))[name = string("input_339_cast_fp16")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_7634_to_fp16 = const()[name = string("op_7634_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_7634_to_fp16, x = input_339_cast_fp16)[name = string("normed_325_cast_fp16")]; + tensor var_7644_split_sizes_0 = const()[name = string("op_7644_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7644_axis_0 = const()[name = string("op_7644_axis_0"), val = int32(-1)]; + tensor var_7644_cast_fp16_0, tensor var_7644_cast_fp16_1 = split(axis = var_7644_axis_0, split_sizes = var_7644_split_sizes_0, x = normed_325_cast_fp16)[name = string("op_7644_cast_fp16")]; + tensor layers_c2_11_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c2_11_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(955196736)))]; + tensor h_69_cast_fp16 = mul(x = var_7644_cast_fp16_0, y = layers_c2_11_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_69_cast_fp16")]; + tensor var_7655 = const()[name = string("op_7655"), val = tensor([0, 2, 1])]; + tensor input_341_axes_0 = const()[name = string("input_341_axes_0"), val = tensor([2])]; + tensor var_7656 = transpose(perm = var_7655, x = h_69_cast_fp16)[name = string("transpose_94")]; + tensor input_341 = expand_dims(axes = input_341_axes_0, x = var_7656)[name = string("input_341")]; + string gate_45_pad_type_0 = const()[name = string("gate_45_pad_type_0"), val = string("valid")]; + tensor gate_45_strides_0 = const()[name = string("gate_45_strides_0"), val = tensor([1, 1])]; + tensor gate_45_pad_0 = const()[name = string("gate_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_45_dilations_0 = const()[name = string("gate_45_dilations_0"), val = tensor([1, 1])]; + int32 gate_45_groups_0 = const()[name = string("gate_45_groups_0"), val = int32(1)]; + tensor gate_45 = conv(dilations = gate_45_dilations_0, groups = gate_45_groups_0, pad = gate_45_pad_0, pad_type = gate_45_pad_type_0, strides = gate_45_strides_0, weight = layers_c2_11_mlp_gate_proj_weight_palettized, x = input_341)[name = string("gate_45")]; + string up_23_pad_type_0 = const()[name = string("up_23_pad_type_0"), val = string("valid")]; + tensor up_23_strides_0 = const()[name = string("up_23_strides_0"), val = tensor([1, 1])]; + tensor up_23_pad_0 = const()[name = string("up_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_23_dilations_0 = const()[name = string("up_23_dilations_0"), val = tensor([1, 1])]; + int32 up_23_groups_0 = const()[name = string("up_23_groups_0"), val = int32(1)]; + tensor up_23 = conv(dilations = up_23_dilations_0, groups = up_23_groups_0, pad = up_23_pad_0, pad_type = up_23_pad_type_0, strides = up_23_strides_0, weight = layers_c2_11_mlp_up_proj_weight_palettized, x = input_341)[name = string("up_23")]; + string gate_47_mode_0 = const()[name = string("gate_47_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_47 = gelu(mode = gate_47_mode_0, x = gate_45)[name = string("gate_47")]; + tensor input_343 = mul(x = gate_47, y = up_23)[name = string("input_343")]; + string mlp_out_23_pad_type_0 = const()[name = string("mlp_out_23_pad_type_0"), val = string("valid")]; + tensor mlp_out_23_strides_0 = const()[name = string("mlp_out_23_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_23_pad_0 = const()[name = string("mlp_out_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_23_dilations_0 = const()[name = string("mlp_out_23_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_23_groups_0 = const()[name = string("mlp_out_23_groups_0"), val = int32(1)]; + tensor mlp_out_23 = conv(dilations = mlp_out_23_dilations_0, groups = mlp_out_23_groups_0, pad = mlp_out_23_pad_0, pad_type = mlp_out_23_pad_type_0, strides = mlp_out_23_strides_0, weight = layers_c2_11_mlp_down_proj_weight_palettized, x = input_343)[name = string("mlp_out_23")]; + tensor var_7696_axes_0 = const()[name = string("op_7696_axes_0"), val = tensor([2])]; + tensor var_7696 = squeeze(axes = var_7696_axes_0, x = mlp_out_23)[name = string("op_7696")]; + tensor var_7700 = const()[name = string("op_7700"), val = tensor([0, 2, 1])]; + int32 var_7706 = const()[name = string("op_7706"), val = int32(-1)]; + fp16 const_137_promoted = const()[name = string("const_137_promoted"), val = fp16(-0x1p+0)]; + tensor x_235 = transpose(perm = var_7700, x = var_7696)[name = string("transpose_93")]; + tensor var_7708 = mul(x = x_235, y = const_137_promoted)[name = string("op_7708")]; + bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; + tensor input_345 = concat(axis = var_7706, interleave = input_345_interleave_0, values = (x_235, var_7708))[name = string("input_345")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_7703_to_fp16 = const()[name = string("op_7703_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_7703_to_fp16, x = input_345)[name = string("normed_329_cast_fp16")]; + tensor var_7713_split_sizes_0 = const()[name = string("op_7713_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7713_axis_0 = const()[name = string("op_7713_axis_0"), val = int32(-1)]; + tensor var_7713_0, tensor var_7713_1 = split(axis = var_7713_axis_0, split_sizes = var_7713_split_sizes_0, x = normed_329_cast_fp16)[name = string("op_7713")]; + tensor hidden_states_113 = mul(x = var_7713_0, y = layers_c2_11_post_feedforward_layernorm_weight)[name = string("hidden_states_113")]; + tensor hidden_states_115_cast_fp16 = add(x = x_233_cast_fp16, y = hidden_states_113)[name = string("hidden_states_115_cast_fp16")]; + tensor per_layer_slice_23_begin_0 = const()[name = string("per_layer_slice_23_begin_0"), val = tensor([0, 0, 5888])]; + tensor per_layer_slice_23_end_0 = const()[name = string("per_layer_slice_23_end_0"), val = tensor([1, 1, 6144])]; + tensor per_layer_slice_23_end_mask_0 = const()[name = string("per_layer_slice_23_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_23_cast_fp16 = slice_by_index(begin = per_layer_slice_23_begin_0, end = per_layer_slice_23_end_0, end_mask = per_layer_slice_23_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_23_cast_fp16")]; + tensor var_7741 = const()[name = string("op_7741"), val = tensor([0, 2, 1])]; + tensor input_347_axes_0 = const()[name = string("input_347_axes_0"), val = tensor([2])]; + tensor var_7742 = transpose(perm = var_7741, x = hidden_states_115_cast_fp16)[name = string("transpose_92")]; + tensor input_347 = expand_dims(axes = input_347_axes_0, x = var_7742)[name = string("input_347")]; + string gated_67_pad_type_0 = const()[name = string("gated_67_pad_type_0"), val = string("valid")]; + tensor gated_67_strides_0 = const()[name = string("gated_67_strides_0"), val = tensor([1, 1])]; + tensor gated_67_pad_0 = const()[name = string("gated_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_67_dilations_0 = const()[name = string("gated_67_dilations_0"), val = tensor([1, 1])]; + int32 gated_67_groups_0 = const()[name = string("gated_67_groups_0"), val = int32(1)]; + tensor gated_67 = conv(dilations = gated_67_dilations_0, groups = gated_67_groups_0, pad = gated_67_pad_0, pad_type = gated_67_pad_type_0, strides = gated_67_strides_0, weight = layers_c2_11_per_layer_input_gate_weight_palettized, x = input_347)[name = string("gated_67")]; + string gated_69_mode_0 = const()[name = string("gated_69_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_69 = gelu(mode = gated_69_mode_0, x = gated_67)[name = string("gated_69")]; + tensor var_7761 = const()[name = string("op_7761"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_23_axes_0 = const()[name = string("per_layer_slice_conv_23_axes_0"), val = tensor([2])]; + tensor var_7762_cast_fp16 = transpose(perm = var_7761, x = per_layer_slice_23_cast_fp16)[name = string("transpose_91")]; + tensor per_layer_slice_conv_23_cast_fp16 = expand_dims(axes = per_layer_slice_conv_23_axes_0, x = var_7762_cast_fp16)[name = string("per_layer_slice_conv_23_cast_fp16")]; + tensor input_349_cast_fp16 = mul(x = gated_69, y = per_layer_slice_conv_23_cast_fp16)[name = string("input_349_cast_fp16")]; + string gated_71_pad_type_0 = const()[name = string("gated_71_pad_type_0"), val = string("valid")]; + tensor gated_71_strides_0 = const()[name = string("gated_71_strides_0"), val = tensor([1, 1])]; + tensor gated_71_pad_0 = const()[name = string("gated_71_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_71_dilations_0 = const()[name = string("gated_71_dilations_0"), val = tensor([1, 1])]; + int32 gated_71_groups_0 = const()[name = string("gated_71_groups_0"), val = int32(1)]; + tensor layers_c2_11_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(955201920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(955529664))))[name = string("layers_c2_11_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_71_cast_fp16 = conv(dilations = gated_71_dilations_0, groups = gated_71_groups_0, pad = gated_71_pad_0, pad_type = gated_71_pad_type_0, strides = gated_71_strides_0, weight = layers_c2_11_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_349_cast_fp16)[name = string("gated_71_cast_fp16")]; + tensor var_7778_axes_0 = const()[name = string("op_7778_axes_0"), val = tensor([2])]; + tensor var_7778_cast_fp16 = squeeze(axes = var_7778_axes_0, x = gated_71_cast_fp16)[name = string("op_7778_cast_fp16")]; + tensor var_7782 = const()[name = string("op_7782"), val = tensor([0, 2, 1])]; + int32 var_7788 = const()[name = string("op_7788"), val = int32(-1)]; + fp16 const_138_promoted_to_fp16 = const()[name = string("const_138_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_237_cast_fp16 = transpose(perm = var_7782, x = var_7778_cast_fp16)[name = string("transpose_90")]; + tensor var_7790_cast_fp16 = mul(x = x_237_cast_fp16, y = const_138_promoted_to_fp16)[name = string("op_7790_cast_fp16")]; + bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; + tensor input_351_cast_fp16 = concat(axis = var_7788, interleave = input_351_interleave_0, values = (x_237_cast_fp16, var_7790_cast_fp16))[name = string("input_351_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_7785_to_fp16 = const()[name = string("op_7785_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_7785_to_fp16, x = input_351_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor var_7795_split_sizes_0 = const()[name = string("op_7795_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7795_axis_0 = const()[name = string("op_7795_axis_0"), val = int32(-1)]; + tensor var_7795_cast_fp16_0, tensor var_7795_cast_fp16_1 = split(axis = var_7795_axis_0, split_sizes = var_7795_split_sizes_0, x = normed_333_cast_fp16)[name = string("op_7795_cast_fp16")]; + tensor layers_c2_11_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c2_11_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(955532288)))]; + tensor hidden_states_119_cast_fp16 = mul(x = var_7795_cast_fp16_0, y = layers_c2_11_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_119_cast_fp16")]; + tensor hidden_states_121_cast_fp16 = add(x = hidden_states_115_cast_fp16, y = hidden_states_119_cast_fp16)[name = string("hidden_states_121_cast_fp16")]; + tensor const_139_promoted_to_fp16 = const()[name = string("const_139_promoted_to_fp16"), val = tensor([0x1.0cp-4])]; + tensor x_239_cast_fp16 = mul(x = hidden_states_121_cast_fp16, y = const_139_promoted_to_fp16)[name = string("x_239_cast_fp16")]; + tensor var_7807_axes_0 = const()[name = string("op_7807_axes_0"), val = tensor([0])]; + tensor var_7807_cast_fp16 = squeeze(axes = var_7807_axes_0, x = kv14_k)[name = string("op_7807_cast_fp16")]; + tensor var_7809_axes_0 = const()[name = string("op_7809_axes_0"), val = tensor([0])]; + tensor var_7809_cast_fp16 = squeeze(axes = var_7809_axes_0, x = kv14_v)[name = string("op_7809_cast_fp16")]; + int32 var_7814 = const()[name = string("op_7814"), val = int32(-1)]; + fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7816_cast_fp16 = mul(x = x_239_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_7816_cast_fp16")]; + bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; + tensor input_353_cast_fp16 = concat(axis = var_7814, interleave = input_353_interleave_0, values = (x_239_cast_fp16, var_7816_cast_fp16))[name = string("input_353_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_7811_to_fp16 = const()[name = string("op_7811_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_7811_to_fp16, x = input_353_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor var_7821_split_sizes_0 = const()[name = string("op_7821_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_7821_axis_0 = const()[name = string("op_7821_axis_0"), val = int32(-1)]; + tensor var_7821_cast_fp16_0, tensor var_7821_cast_fp16_1 = split(axis = var_7821_axis_0, split_sizes = var_7821_split_sizes_0, x = normed_337_cast_fp16)[name = string("op_7821_cast_fp16")]; + tensor layers_c3_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(955537472)))]; + tensor h_73_cast_fp16 = mul(x = var_7821_cast_fp16_0, y = layers_c3_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_73_cast_fp16")]; + tensor var_7827 = const()[name = string("op_7827"), val = tensor([0, 2, 1])]; + tensor var_7830_axes_0 = const()[name = string("op_7830_axes_0"), val = tensor([2])]; + tensor var_7828_cast_fp16 = transpose(perm = var_7827, x = h_73_cast_fp16)[name = string("transpose_89")]; + tensor var_7830_cast_fp16 = expand_dims(axes = var_7830_axes_0, x = var_7828_cast_fp16)[name = string("op_7830_cast_fp16")]; + string var_7846_pad_type_0 = const()[name = string("op_7846_pad_type_0"), val = string("valid")]; + tensor var_7846_strides_0 = const()[name = string("op_7846_strides_0"), val = tensor([1, 1])]; + tensor var_7846_pad_0 = const()[name = string("op_7846_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7846_dilations_0 = const()[name = string("op_7846_dilations_0"), val = tensor([1, 1])]; + int32 var_7846_groups_0 = const()[name = string("op_7846_groups_0"), val = int32(1)]; + tensor var_7846 = conv(dilations = var_7846_dilations_0, groups = var_7846_groups_0, pad = var_7846_pad_0, pad_type = var_7846_pad_type_0, strides = var_7846_strides_0, weight = layers_c3_0_self_attn_q_proj_weight_palettized, x = var_7830_cast_fp16)[name = string("op_7846")]; + tensor var_7851 = const()[name = string("op_7851"), val = tensor([1, 8, 256, 1])]; + tensor var_7852 = reshape(shape = var_7851, x = var_7846)[name = string("op_7852")]; + tensor var_7857 = const()[name = string("op_7857"), val = tensor([0, 1, 3, 2])]; + tensor var_7867 = const()[name = string("op_7867"), val = tensor([1, 8, 256])]; + tensor var_7858 = transpose(perm = var_7857, x = var_7852)[name = string("transpose_88")]; + tensor x_241 = reshape(shape = var_7867, x = var_7858)[name = string("x_241")]; + int32 var_7873 = const()[name = string("op_7873"), val = int32(-1)]; + fp16 const_141_promoted = const()[name = string("const_141_promoted"), val = fp16(-0x1p+0)]; + tensor var_7875 = mul(x = x_241, y = const_141_promoted)[name = string("op_7875")]; + bool input_357_interleave_0 = const()[name = string("input_357_interleave_0"), val = bool(false)]; + tensor input_357 = concat(axis = var_7873, interleave = input_357_interleave_0, values = (x_241, var_7875))[name = string("input_357")]; + tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; + fp16 var_7870_to_fp16 = const()[name = string("op_7870_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_7870_to_fp16, x = input_357)[name = string("normed_341_cast_fp16")]; + tensor var_7880_split_sizes_0 = const()[name = string("op_7880_split_sizes_0"), val = tensor([256, 256])]; + int32 var_7880_axis_0 = const()[name = string("op_7880_axis_0"), val = int32(-1)]; + tensor var_7880_0, tensor var_7880_1 = split(axis = var_7880_axis_0, split_sizes = var_7880_split_sizes_0, x = normed_341_cast_fp16)[name = string("op_7880")]; + tensor var_7882 = mul(x = var_7880_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_7882")]; + tensor var_7887 = const()[name = string("op_7887"), val = tensor([1, 8, 1, 256])]; + tensor q_99 = reshape(shape = var_7887, x = var_7882)[name = string("q_99")]; + tensor var_7889_cast_fp16 = mul(x = q_99, y = cos_s)[name = string("op_7889_cast_fp16")]; + tensor var_7890_split_sizes_0 = const()[name = string("op_7890_split_sizes_0"), val = tensor([128, 128])]; + int32 var_7890_axis_0 = const()[name = string("op_7890_axis_0"), val = int32(-1)]; + tensor var_7890_0, tensor var_7890_1 = split(axis = var_7890_axis_0, split_sizes = var_7890_split_sizes_0, x = q_99)[name = string("op_7890")]; + fp16 const_142_promoted = const()[name = string("const_142_promoted"), val = fp16(-0x1p+0)]; + tensor var_7892 = mul(x = var_7890_1, y = const_142_promoted)[name = string("op_7892")]; + int32 var_7894 = const()[name = string("op_7894"), val = int32(-1)]; + bool var_7895_interleave_0 = const()[name = string("op_7895_interleave_0"), val = bool(false)]; + tensor var_7895 = concat(axis = var_7894, interleave = var_7895_interleave_0, values = (var_7892, var_7890_0))[name = string("op_7895")]; + tensor var_7896_cast_fp16 = mul(x = var_7895, y = sin_s)[name = string("op_7896_cast_fp16")]; + tensor q_101_cast_fp16 = add(x = var_7889_cast_fp16, y = var_7896_cast_fp16)[name = string("q_101_cast_fp16")]; + bool attn_weights_49_transpose_x_0 = const()[name = string("attn_weights_49_transpose_x_0"), val = bool(false)]; + bool attn_weights_49_transpose_y_0 = const()[name = string("attn_weights_49_transpose_y_0"), val = bool(false)]; + tensor attn_weights_49_cast_fp16 = matmul(transpose_x = attn_weights_49_transpose_x_0, transpose_y = attn_weights_49_transpose_y_0, x = q_101_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_49_cast_fp16")]; + tensor x_243_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask_sliding)[name = string("x_243_cast_fp16")]; + tensor reduce_max_12_axes_0 = const()[name = string("reduce_max_12_axes_0"), val = tensor([-1])]; + bool reduce_max_12_keep_dims_0 = const()[name = string("reduce_max_12_keep_dims_0"), val = bool(true)]; + tensor reduce_max_12 = reduce_max(axes = reduce_max_12_axes_0, keep_dims = reduce_max_12_keep_dims_0, x = x_243_cast_fp16)[name = string("reduce_max_12")]; + tensor var_7928 = sub(x = x_243_cast_fp16, y = reduce_max_12)[name = string("op_7928")]; + tensor var_7934 = exp(x = var_7928)[name = string("op_7934")]; + tensor var_7944_axes_0 = const()[name = string("op_7944_axes_0"), val = tensor([-1])]; + bool var_7944_keep_dims_0 = const()[name = string("op_7944_keep_dims_0"), val = bool(true)]; + tensor var_7944 = reduce_sum(axes = var_7944_axes_0, keep_dims = var_7944_keep_dims_0, x = var_7934)[name = string("op_7944")]; + tensor var_7950_cast_fp16 = real_div(x = var_7934, y = var_7944)[name = string("op_7950_cast_fp16")]; + bool attn_output_73_transpose_x_0 = const()[name = string("attn_output_73_transpose_x_0"), val = bool(false)]; + bool attn_output_73_transpose_y_0 = const()[name = string("attn_output_73_transpose_y_0"), val = bool(false)]; + tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_0, transpose_y = attn_output_73_transpose_y_0, x = var_7950_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_73_cast_fp16")]; + tensor var_7961 = const()[name = string("op_7961"), val = tensor([0, 2, 1, 3])]; + tensor var_7968 = const()[name = string("op_7968"), val = tensor([1, 1, -1])]; + tensor var_7962_cast_fp16 = transpose(perm = var_7961, x = attn_output_73_cast_fp16)[name = string("transpose_87")]; + tensor attn_output_75_cast_fp16 = reshape(shape = var_7968, x = var_7962_cast_fp16)[name = string("attn_output_75_cast_fp16")]; + tensor var_7973 = const()[name = string("op_7973"), val = tensor([0, 2, 1])]; + string var_7989_pad_type_0 = const()[name = string("op_7989_pad_type_0"), val = string("valid")]; + int32 var_7989_groups_0 = const()[name = string("op_7989_groups_0"), val = int32(1)]; + tensor var_7989_strides_0 = const()[name = string("op_7989_strides_0"), val = tensor([1])]; + tensor var_7989_pad_0 = const()[name = string("op_7989_pad_0"), val = tensor([0, 0])]; + tensor var_7989_dilations_0 = const()[name = string("op_7989_dilations_0"), val = tensor([1])]; + tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(955542656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958164160))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7974_cast_fp16 = transpose(perm = var_7973, x = attn_output_75_cast_fp16)[name = string("transpose_86")]; + tensor var_7989_cast_fp16 = conv(dilations = var_7989_dilations_0, groups = var_7989_groups_0, pad = var_7989_pad_0, pad_type = var_7989_pad_type_0, strides = var_7989_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_7974_cast_fp16)[name = string("op_7989_cast_fp16")]; + tensor var_7993 = const()[name = string("op_7993"), val = tensor([0, 2, 1])]; + int32 var_7999 = const()[name = string("op_7999"), val = int32(-1)]; + fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_247_cast_fp16 = transpose(perm = var_7993, x = var_7989_cast_fp16)[name = string("transpose_85")]; + tensor var_8001_cast_fp16 = mul(x = x_247_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_8001_cast_fp16")]; + bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; + tensor input_361_cast_fp16 = concat(axis = var_7999, interleave = input_361_interleave_0, values = (x_247_cast_fp16, var_8001_cast_fp16))[name = string("input_361_cast_fp16")]; + tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; + fp16 var_7996_to_fp16 = const()[name = string("op_7996_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_7996_to_fp16, x = input_361_cast_fp16)[name = string("normed_345_cast_fp16")]; + tensor var_8006_split_sizes_0 = const()[name = string("op_8006_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8006_axis_0 = const()[name = string("op_8006_axis_0"), val = int32(-1)]; + tensor var_8006_cast_fp16_0, tensor var_8006_cast_fp16_1 = split(axis = var_8006_axis_0, split_sizes = var_8006_split_sizes_0, x = normed_345_cast_fp16)[name = string("op_8006_cast_fp16")]; + tensor layers_c3_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958166784)))]; + tensor attn_output_77_cast_fp16 = mul(x = var_8006_cast_fp16_0, y = layers_c3_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_77_cast_fp16")]; + tensor x_249_cast_fp16 = add(x = x_239_cast_fp16, y = attn_output_77_cast_fp16)[name = string("x_249_cast_fp16")]; + int32 var_8015 = const()[name = string("op_8015"), val = int32(-1)]; + fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8017_cast_fp16 = mul(x = x_249_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_8017_cast_fp16")]; + bool input_363_interleave_0 = const()[name = string("input_363_interleave_0"), val = bool(false)]; + tensor input_363_cast_fp16 = concat(axis = var_8015, interleave = input_363_interleave_0, values = (x_249_cast_fp16, var_8017_cast_fp16))[name = string("input_363_cast_fp16")]; + tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; + fp16 var_8012_to_fp16 = const()[name = string("op_8012_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_8012_to_fp16, x = input_363_cast_fp16)[name = string("normed_349_cast_fp16")]; + tensor var_8022_split_sizes_0 = const()[name = string("op_8022_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8022_axis_0 = const()[name = string("op_8022_axis_0"), val = int32(-1)]; + tensor var_8022_cast_fp16_0, tensor var_8022_cast_fp16_1 = split(axis = var_8022_axis_0, split_sizes = var_8022_split_sizes_0, x = normed_349_cast_fp16)[name = string("op_8022_cast_fp16")]; + tensor layers_c3_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958171968)))]; + tensor h_75_cast_fp16 = mul(x = var_8022_cast_fp16_0, y = layers_c3_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_75_cast_fp16")]; + tensor var_8033 = const()[name = string("op_8033"), val = tensor([0, 2, 1])]; + tensor input_365_axes_0 = const()[name = string("input_365_axes_0"), val = tensor([2])]; + tensor var_8034 = transpose(perm = var_8033, x = h_75_cast_fp16)[name = string("transpose_84")]; + tensor input_365 = expand_dims(axes = input_365_axes_0, x = var_8034)[name = string("input_365")]; + string gate_49_pad_type_0 = const()[name = string("gate_49_pad_type_0"), val = string("valid")]; + tensor gate_49_strides_0 = const()[name = string("gate_49_strides_0"), val = tensor([1, 1])]; + tensor gate_49_pad_0 = const()[name = string("gate_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_49_dilations_0 = const()[name = string("gate_49_dilations_0"), val = tensor([1, 1])]; + int32 gate_49_groups_0 = const()[name = string("gate_49_groups_0"), val = int32(1)]; + tensor gate_49 = conv(dilations = gate_49_dilations_0, groups = gate_49_groups_0, pad = gate_49_pad_0, pad_type = gate_49_pad_type_0, strides = gate_49_strides_0, weight = layers_c3_0_mlp_gate_proj_weight_palettized, x = input_365)[name = string("gate_49")]; + string up_25_pad_type_0 = const()[name = string("up_25_pad_type_0"), val = string("valid")]; + tensor up_25_strides_0 = const()[name = string("up_25_strides_0"), val = tensor([1, 1])]; + tensor up_25_pad_0 = const()[name = string("up_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_25_dilations_0 = const()[name = string("up_25_dilations_0"), val = tensor([1, 1])]; + int32 up_25_groups_0 = const()[name = string("up_25_groups_0"), val = int32(1)]; + tensor up_25 = conv(dilations = up_25_dilations_0, groups = up_25_groups_0, pad = up_25_pad_0, pad_type = up_25_pad_type_0, strides = up_25_strides_0, weight = layers_c3_0_mlp_up_proj_weight_palettized, x = input_365)[name = string("up_25")]; + string gate_51_mode_0 = const()[name = string("gate_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_51 = gelu(mode = gate_51_mode_0, x = gate_49)[name = string("gate_51")]; + tensor input_367 = mul(x = gate_51, y = up_25)[name = string("input_367")]; + string mlp_out_25_pad_type_0 = const()[name = string("mlp_out_25_pad_type_0"), val = string("valid")]; + tensor mlp_out_25_strides_0 = const()[name = string("mlp_out_25_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_25_pad_0 = const()[name = string("mlp_out_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_25_dilations_0 = const()[name = string("mlp_out_25_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_25_groups_0 = const()[name = string("mlp_out_25_groups_0"), val = int32(1)]; + tensor mlp_out_25 = conv(dilations = mlp_out_25_dilations_0, groups = mlp_out_25_groups_0, pad = mlp_out_25_pad_0, pad_type = mlp_out_25_pad_type_0, strides = mlp_out_25_strides_0, weight = layers_c3_0_mlp_down_proj_weight_palettized, x = input_367)[name = string("mlp_out_25")]; + tensor var_8074_axes_0 = const()[name = string("op_8074_axes_0"), val = tensor([2])]; + tensor var_8074 = squeeze(axes = var_8074_axes_0, x = mlp_out_25)[name = string("op_8074")]; + tensor var_8078 = const()[name = string("op_8078"), val = tensor([0, 2, 1])]; + int32 var_8084 = const()[name = string("op_8084"), val = int32(-1)]; + fp16 const_145_promoted = const()[name = string("const_145_promoted"), val = fp16(-0x1p+0)]; + tensor x_251 = transpose(perm = var_8078, x = var_8074)[name = string("transpose_83")]; + tensor var_8086 = mul(x = x_251, y = const_145_promoted)[name = string("op_8086")]; + bool input_369_interleave_0 = const()[name = string("input_369_interleave_0"), val = bool(false)]; + tensor input_369 = concat(axis = var_8084, interleave = input_369_interleave_0, values = (x_251, var_8086))[name = string("input_369")]; + tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; + fp16 var_8081_to_fp16 = const()[name = string("op_8081_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_8081_to_fp16, x = input_369)[name = string("normed_353_cast_fp16")]; + tensor var_8091_split_sizes_0 = const()[name = string("op_8091_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8091_axis_0 = const()[name = string("op_8091_axis_0"), val = int32(-1)]; + tensor var_8091_0, tensor var_8091_1 = split(axis = var_8091_axis_0, split_sizes = var_8091_split_sizes_0, x = normed_353_cast_fp16)[name = string("op_8091")]; + tensor hidden_states_123 = mul(x = var_8091_0, y = layers_c3_0_post_feedforward_layernorm_weight)[name = string("hidden_states_123")]; + tensor hidden_states_125_cast_fp16 = add(x = x_249_cast_fp16, y = hidden_states_123)[name = string("hidden_states_125_cast_fp16")]; + tensor per_layer_slice_25_begin_0 = const()[name = string("per_layer_slice_25_begin_0"), val = tensor([0, 0, 6144])]; + tensor per_layer_slice_25_end_0 = const()[name = string("per_layer_slice_25_end_0"), val = tensor([1, 1, 6400])]; + tensor per_layer_slice_25_end_mask_0 = const()[name = string("per_layer_slice_25_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_25_cast_fp16 = slice_by_index(begin = per_layer_slice_25_begin_0, end = per_layer_slice_25_end_0, end_mask = per_layer_slice_25_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_25_cast_fp16")]; + tensor var_8119 = const()[name = string("op_8119"), val = tensor([0, 2, 1])]; + tensor input_371_axes_0 = const()[name = string("input_371_axes_0"), val = tensor([2])]; + tensor var_8120 = transpose(perm = var_8119, x = hidden_states_125_cast_fp16)[name = string("transpose_82")]; + tensor input_371 = expand_dims(axes = input_371_axes_0, x = var_8120)[name = string("input_371")]; + string gated_73_pad_type_0 = const()[name = string("gated_73_pad_type_0"), val = string("valid")]; + tensor gated_73_strides_0 = const()[name = string("gated_73_strides_0"), val = tensor([1, 1])]; + tensor gated_73_pad_0 = const()[name = string("gated_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_73_dilations_0 = const()[name = string("gated_73_dilations_0"), val = tensor([1, 1])]; + int32 gated_73_groups_0 = const()[name = string("gated_73_groups_0"), val = int32(1)]; + tensor gated_73 = conv(dilations = gated_73_dilations_0, groups = gated_73_groups_0, pad = gated_73_pad_0, pad_type = gated_73_pad_type_0, strides = gated_73_strides_0, weight = layers_c3_0_per_layer_input_gate_weight_palettized, x = input_371)[name = string("gated_73")]; + string gated_75_mode_0 = const()[name = string("gated_75_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_75 = gelu(mode = gated_75_mode_0, x = gated_73)[name = string("gated_75")]; + tensor var_8139 = const()[name = string("op_8139"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_25_axes_0 = const()[name = string("per_layer_slice_conv_25_axes_0"), val = tensor([2])]; + tensor var_8140_cast_fp16 = transpose(perm = var_8139, x = per_layer_slice_25_cast_fp16)[name = string("transpose_81")]; + tensor per_layer_slice_conv_25_cast_fp16 = expand_dims(axes = per_layer_slice_conv_25_axes_0, x = var_8140_cast_fp16)[name = string("per_layer_slice_conv_25_cast_fp16")]; + tensor input_373_cast_fp16 = mul(x = gated_75, y = per_layer_slice_conv_25_cast_fp16)[name = string("input_373_cast_fp16")]; + string gated_77_pad_type_0 = const()[name = string("gated_77_pad_type_0"), val = string("valid")]; + tensor gated_77_strides_0 = const()[name = string("gated_77_strides_0"), val = tensor([1, 1])]; + tensor gated_77_pad_0 = const()[name = string("gated_77_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_77_dilations_0 = const()[name = string("gated_77_dilations_0"), val = tensor([1, 1])]; + int32 gated_77_groups_0 = const()[name = string("gated_77_groups_0"), val = int32(1)]; + tensor layers_c3_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958177152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958504896))))[name = string("layers_c3_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_77_cast_fp16 = conv(dilations = gated_77_dilations_0, groups = gated_77_groups_0, pad = gated_77_pad_0, pad_type = gated_77_pad_type_0, strides = gated_77_strides_0, weight = layers_c3_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_373_cast_fp16)[name = string("gated_77_cast_fp16")]; + tensor var_8156_axes_0 = const()[name = string("op_8156_axes_0"), val = tensor([2])]; + tensor var_8156_cast_fp16 = squeeze(axes = var_8156_axes_0, x = gated_77_cast_fp16)[name = string("op_8156_cast_fp16")]; + tensor var_8160 = const()[name = string("op_8160"), val = tensor([0, 2, 1])]; + int32 var_8166 = const()[name = string("op_8166"), val = int32(-1)]; + fp16 const_146_promoted_to_fp16 = const()[name = string("const_146_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_253_cast_fp16 = transpose(perm = var_8160, x = var_8156_cast_fp16)[name = string("transpose_80")]; + tensor var_8168_cast_fp16 = mul(x = x_253_cast_fp16, y = const_146_promoted_to_fp16)[name = string("op_8168_cast_fp16")]; + bool input_375_interleave_0 = const()[name = string("input_375_interleave_0"), val = bool(false)]; + tensor input_375_cast_fp16 = concat(axis = var_8166, interleave = input_375_interleave_0, values = (x_253_cast_fp16, var_8168_cast_fp16))[name = string("input_375_cast_fp16")]; + tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; + fp16 var_8163_to_fp16 = const()[name = string("op_8163_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_8163_to_fp16, x = input_375_cast_fp16)[name = string("normed_357_cast_fp16")]; + tensor var_8173_split_sizes_0 = const()[name = string("op_8173_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8173_axis_0 = const()[name = string("op_8173_axis_0"), val = int32(-1)]; + tensor var_8173_cast_fp16_0, tensor var_8173_cast_fp16_1 = split(axis = var_8173_axis_0, split_sizes = var_8173_split_sizes_0, x = normed_357_cast_fp16)[name = string("op_8173_cast_fp16")]; + tensor layers_c3_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958507520)))]; + tensor hidden_states_129_cast_fp16 = mul(x = var_8173_cast_fp16_0, y = layers_c3_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_129_cast_fp16")]; + tensor hidden_states_131_cast_fp16 = add(x = hidden_states_125_cast_fp16, y = hidden_states_129_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; + tensor const_147_promoted_to_fp16 = const()[name = string("const_147_promoted_to_fp16"), val = tensor([0x1.02p-1])]; + tensor x_255_cast_fp16 = mul(x = hidden_states_131_cast_fp16, y = const_147_promoted_to_fp16)[name = string("x_255_cast_fp16")]; + int32 var_8188 = const()[name = string("op_8188"), val = int32(-1)]; + fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8190_cast_fp16 = mul(x = x_255_cast_fp16, y = const_148_promoted_to_fp16)[name = string("op_8190_cast_fp16")]; + bool input_377_interleave_0 = const()[name = string("input_377_interleave_0"), val = bool(false)]; + tensor input_377_cast_fp16 = concat(axis = var_8188, interleave = input_377_interleave_0, values = (x_255_cast_fp16, var_8190_cast_fp16))[name = string("input_377_cast_fp16")]; + tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; + fp16 var_8185_to_fp16 = const()[name = string("op_8185_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_8185_to_fp16, x = input_377_cast_fp16)[name = string("normed_361_cast_fp16")]; + tensor var_8195_split_sizes_0 = const()[name = string("op_8195_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8195_axis_0 = const()[name = string("op_8195_axis_0"), val = int32(-1)]; + tensor var_8195_cast_fp16_0, tensor var_8195_cast_fp16_1 = split(axis = var_8195_axis_0, split_sizes = var_8195_split_sizes_0, x = normed_361_cast_fp16)[name = string("op_8195_cast_fp16")]; + tensor layers_c3_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958512704)))]; + tensor h_79_cast_fp16 = mul(x = var_8195_cast_fp16_0, y = layers_c3_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_79_cast_fp16")]; + tensor var_8201 = const()[name = string("op_8201"), val = tensor([0, 2, 1])]; + tensor var_8204_axes_0 = const()[name = string("op_8204_axes_0"), val = tensor([2])]; + tensor var_8202_cast_fp16 = transpose(perm = var_8201, x = h_79_cast_fp16)[name = string("transpose_79")]; + tensor var_8204_cast_fp16 = expand_dims(axes = var_8204_axes_0, x = var_8202_cast_fp16)[name = string("op_8204_cast_fp16")]; + string var_8220_pad_type_0 = const()[name = string("op_8220_pad_type_0"), val = string("valid")]; + tensor var_8220_strides_0 = const()[name = string("op_8220_strides_0"), val = tensor([1, 1])]; + tensor var_8220_pad_0 = const()[name = string("op_8220_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8220_dilations_0 = const()[name = string("op_8220_dilations_0"), val = tensor([1, 1])]; + int32 var_8220_groups_0 = const()[name = string("op_8220_groups_0"), val = int32(1)]; + tensor var_8220 = conv(dilations = var_8220_dilations_0, groups = var_8220_groups_0, pad = var_8220_pad_0, pad_type = var_8220_pad_type_0, strides = var_8220_strides_0, weight = layers_c3_1_self_attn_q_proj_weight_palettized, x = var_8204_cast_fp16)[name = string("op_8220")]; + tensor var_8225 = const()[name = string("op_8225"), val = tensor([1, 8, 256, 1])]; + tensor var_8226 = reshape(shape = var_8225, x = var_8220)[name = string("op_8226")]; + tensor var_8231 = const()[name = string("op_8231"), val = tensor([0, 1, 3, 2])]; + tensor var_8241 = const()[name = string("op_8241"), val = tensor([1, 8, 256])]; + tensor var_8232 = transpose(perm = var_8231, x = var_8226)[name = string("transpose_78")]; + tensor x_257 = reshape(shape = var_8241, x = var_8232)[name = string("x_257")]; + int32 var_8247 = const()[name = string("op_8247"), val = int32(-1)]; + fp16 const_149_promoted = const()[name = string("const_149_promoted"), val = fp16(-0x1p+0)]; + tensor var_8249 = mul(x = x_257, y = const_149_promoted)[name = string("op_8249")]; + bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; + tensor input_381 = concat(axis = var_8247, interleave = input_381_interleave_0, values = (x_257, var_8249))[name = string("input_381")]; + tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; + fp16 var_8244_to_fp16 = const()[name = string("op_8244_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_8244_to_fp16, x = input_381)[name = string("normed_365_cast_fp16")]; + tensor var_8254_split_sizes_0 = const()[name = string("op_8254_split_sizes_0"), val = tensor([256, 256])]; + int32 var_8254_axis_0 = const()[name = string("op_8254_axis_0"), val = int32(-1)]; + tensor var_8254_0, tensor var_8254_1 = split(axis = var_8254_axis_0, split_sizes = var_8254_split_sizes_0, x = normed_365_cast_fp16)[name = string("op_8254")]; + tensor var_8256 = mul(x = var_8254_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_8256")]; + tensor var_8261 = const()[name = string("op_8261"), val = tensor([1, 8, 1, 256])]; + tensor q_105 = reshape(shape = var_8261, x = var_8256)[name = string("q_105")]; + tensor var_8263_cast_fp16 = mul(x = q_105, y = cos_s)[name = string("op_8263_cast_fp16")]; + tensor var_8264_split_sizes_0 = const()[name = string("op_8264_split_sizes_0"), val = tensor([128, 128])]; + int32 var_8264_axis_0 = const()[name = string("op_8264_axis_0"), val = int32(-1)]; + tensor var_8264_0, tensor var_8264_1 = split(axis = var_8264_axis_0, split_sizes = var_8264_split_sizes_0, x = q_105)[name = string("op_8264")]; + fp16 const_150_promoted = const()[name = string("const_150_promoted"), val = fp16(-0x1p+0)]; + tensor var_8266 = mul(x = var_8264_1, y = const_150_promoted)[name = string("op_8266")]; + int32 var_8268 = const()[name = string("op_8268"), val = int32(-1)]; + bool var_8269_interleave_0 = const()[name = string("op_8269_interleave_0"), val = bool(false)]; + tensor var_8269 = concat(axis = var_8268, interleave = var_8269_interleave_0, values = (var_8266, var_8264_0))[name = string("op_8269")]; + tensor var_8270_cast_fp16 = mul(x = var_8269, y = sin_s)[name = string("op_8270_cast_fp16")]; + tensor q_107_cast_fp16 = add(x = var_8263_cast_fp16, y = var_8270_cast_fp16)[name = string("q_107_cast_fp16")]; + bool attn_weights_53_transpose_x_0 = const()[name = string("attn_weights_53_transpose_x_0"), val = bool(false)]; + bool attn_weights_53_transpose_y_0 = const()[name = string("attn_weights_53_transpose_y_0"), val = bool(false)]; + tensor attn_weights_53_cast_fp16 = matmul(transpose_x = attn_weights_53_transpose_x_0, transpose_y = attn_weights_53_transpose_y_0, x = q_107_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + tensor x_259_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = causal_mask_sliding)[name = string("x_259_cast_fp16")]; + tensor reduce_max_13_axes_0 = const()[name = string("reduce_max_13_axes_0"), val = tensor([-1])]; + bool reduce_max_13_keep_dims_0 = const()[name = string("reduce_max_13_keep_dims_0"), val = bool(true)]; + tensor reduce_max_13 = reduce_max(axes = reduce_max_13_axes_0, keep_dims = reduce_max_13_keep_dims_0, x = x_259_cast_fp16)[name = string("reduce_max_13")]; + tensor var_8302 = sub(x = x_259_cast_fp16, y = reduce_max_13)[name = string("op_8302")]; + tensor var_8308 = exp(x = var_8302)[name = string("op_8308")]; + tensor var_8318_axes_0 = const()[name = string("op_8318_axes_0"), val = tensor([-1])]; + bool var_8318_keep_dims_0 = const()[name = string("op_8318_keep_dims_0"), val = bool(true)]; + tensor var_8318 = reduce_sum(axes = var_8318_axes_0, keep_dims = var_8318_keep_dims_0, x = var_8308)[name = string("op_8318")]; + tensor var_8324_cast_fp16 = real_div(x = var_8308, y = var_8318)[name = string("op_8324_cast_fp16")]; + bool attn_output_79_transpose_x_0 = const()[name = string("attn_output_79_transpose_x_0"), val = bool(false)]; + bool attn_output_79_transpose_y_0 = const()[name = string("attn_output_79_transpose_y_0"), val = bool(false)]; + tensor attn_output_79_cast_fp16 = matmul(transpose_x = attn_output_79_transpose_x_0, transpose_y = attn_output_79_transpose_y_0, x = var_8324_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_79_cast_fp16")]; + tensor var_8335 = const()[name = string("op_8335"), val = tensor([0, 2, 1, 3])]; + tensor var_8342 = const()[name = string("op_8342"), val = tensor([1, 1, -1])]; + tensor var_8336_cast_fp16 = transpose(perm = var_8335, x = attn_output_79_cast_fp16)[name = string("transpose_77")]; + tensor attn_output_81_cast_fp16 = reshape(shape = var_8342, x = var_8336_cast_fp16)[name = string("attn_output_81_cast_fp16")]; + tensor var_8347 = const()[name = string("op_8347"), val = tensor([0, 2, 1])]; + string var_8363_pad_type_0 = const()[name = string("op_8363_pad_type_0"), val = string("valid")]; + int32 var_8363_groups_0 = const()[name = string("op_8363_groups_0"), val = int32(1)]; + tensor var_8363_strides_0 = const()[name = string("op_8363_strides_0"), val = tensor([1])]; + tensor var_8363_pad_0 = const()[name = string("op_8363_pad_0"), val = tensor([0, 0])]; + tensor var_8363_dilations_0 = const()[name = string("op_8363_dilations_0"), val = tensor([1])]; + tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(958517888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961139392))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8348_cast_fp16 = transpose(perm = var_8347, x = attn_output_81_cast_fp16)[name = string("transpose_76")]; + tensor var_8363_cast_fp16 = conv(dilations = var_8363_dilations_0, groups = var_8363_groups_0, pad = var_8363_pad_0, pad_type = var_8363_pad_type_0, strides = var_8363_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_8348_cast_fp16)[name = string("op_8363_cast_fp16")]; + tensor var_8367 = const()[name = string("op_8367"), val = tensor([0, 2, 1])]; + int32 var_8373 = const()[name = string("op_8373"), val = int32(-1)]; + fp16 const_151_promoted_to_fp16 = const()[name = string("const_151_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_263_cast_fp16 = transpose(perm = var_8367, x = var_8363_cast_fp16)[name = string("transpose_75")]; + tensor var_8375_cast_fp16 = mul(x = x_263_cast_fp16, y = const_151_promoted_to_fp16)[name = string("op_8375_cast_fp16")]; + bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; + tensor input_385_cast_fp16 = concat(axis = var_8373, interleave = input_385_interleave_0, values = (x_263_cast_fp16, var_8375_cast_fp16))[name = string("input_385_cast_fp16")]; + tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; + fp16 var_8370_to_fp16 = const()[name = string("op_8370_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_8370_to_fp16, x = input_385_cast_fp16)[name = string("normed_369_cast_fp16")]; + tensor var_8380_split_sizes_0 = const()[name = string("op_8380_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8380_axis_0 = const()[name = string("op_8380_axis_0"), val = int32(-1)]; + tensor var_8380_cast_fp16_0, tensor var_8380_cast_fp16_1 = split(axis = var_8380_axis_0, split_sizes = var_8380_split_sizes_0, x = normed_369_cast_fp16)[name = string("op_8380_cast_fp16")]; + tensor layers_c3_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961142016)))]; + tensor attn_output_83_cast_fp16 = mul(x = var_8380_cast_fp16_0, y = layers_c3_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_83_cast_fp16")]; + tensor x_265_cast_fp16 = add(x = x_255_cast_fp16, y = attn_output_83_cast_fp16)[name = string("x_265_cast_fp16")]; + int32 var_8389 = const()[name = string("op_8389"), val = int32(-1)]; + fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8391_cast_fp16 = mul(x = x_265_cast_fp16, y = const_152_promoted_to_fp16)[name = string("op_8391_cast_fp16")]; + bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; + tensor input_387_cast_fp16 = concat(axis = var_8389, interleave = input_387_interleave_0, values = (x_265_cast_fp16, var_8391_cast_fp16))[name = string("input_387_cast_fp16")]; + tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; + fp16 var_8386_to_fp16 = const()[name = string("op_8386_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_8386_to_fp16, x = input_387_cast_fp16)[name = string("normed_373_cast_fp16")]; + tensor var_8396_split_sizes_0 = const()[name = string("op_8396_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8396_axis_0 = const()[name = string("op_8396_axis_0"), val = int32(-1)]; + tensor var_8396_cast_fp16_0, tensor var_8396_cast_fp16_1 = split(axis = var_8396_axis_0, split_sizes = var_8396_split_sizes_0, x = normed_373_cast_fp16)[name = string("op_8396_cast_fp16")]; + tensor layers_c3_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961147200)))]; + tensor h_81_cast_fp16 = mul(x = var_8396_cast_fp16_0, y = layers_c3_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_81_cast_fp16")]; + tensor var_8407 = const()[name = string("op_8407"), val = tensor([0, 2, 1])]; + tensor input_389_axes_0 = const()[name = string("input_389_axes_0"), val = tensor([2])]; + tensor var_8408 = transpose(perm = var_8407, x = h_81_cast_fp16)[name = string("transpose_74")]; + tensor input_389 = expand_dims(axes = input_389_axes_0, x = var_8408)[name = string("input_389")]; + string gate_53_pad_type_0 = const()[name = string("gate_53_pad_type_0"), val = string("valid")]; + tensor gate_53_strides_0 = const()[name = string("gate_53_strides_0"), val = tensor([1, 1])]; + tensor gate_53_pad_0 = const()[name = string("gate_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_53_dilations_0 = const()[name = string("gate_53_dilations_0"), val = tensor([1, 1])]; + int32 gate_53_groups_0 = const()[name = string("gate_53_groups_0"), val = int32(1)]; + tensor gate_53 = conv(dilations = gate_53_dilations_0, groups = gate_53_groups_0, pad = gate_53_pad_0, pad_type = gate_53_pad_type_0, strides = gate_53_strides_0, weight = layers_c3_1_mlp_gate_proj_weight_palettized, x = input_389)[name = string("gate_53")]; + string up_27_pad_type_0 = const()[name = string("up_27_pad_type_0"), val = string("valid")]; + tensor up_27_strides_0 = const()[name = string("up_27_strides_0"), val = tensor([1, 1])]; + tensor up_27_pad_0 = const()[name = string("up_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_27_dilations_0 = const()[name = string("up_27_dilations_0"), val = tensor([1, 1])]; + int32 up_27_groups_0 = const()[name = string("up_27_groups_0"), val = int32(1)]; + tensor up_27 = conv(dilations = up_27_dilations_0, groups = up_27_groups_0, pad = up_27_pad_0, pad_type = up_27_pad_type_0, strides = up_27_strides_0, weight = layers_c3_1_mlp_up_proj_weight_palettized, x = input_389)[name = string("up_27")]; + string gate_55_mode_0 = const()[name = string("gate_55_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_55 = gelu(mode = gate_55_mode_0, x = gate_53)[name = string("gate_55")]; + tensor input_391 = mul(x = gate_55, y = up_27)[name = string("input_391")]; + string mlp_out_27_pad_type_0 = const()[name = string("mlp_out_27_pad_type_0"), val = string("valid")]; + tensor mlp_out_27_strides_0 = const()[name = string("mlp_out_27_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_27_pad_0 = const()[name = string("mlp_out_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_27_dilations_0 = const()[name = string("mlp_out_27_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_27_groups_0 = const()[name = string("mlp_out_27_groups_0"), val = int32(1)]; + tensor mlp_out_27 = conv(dilations = mlp_out_27_dilations_0, groups = mlp_out_27_groups_0, pad = mlp_out_27_pad_0, pad_type = mlp_out_27_pad_type_0, strides = mlp_out_27_strides_0, weight = layers_c3_1_mlp_down_proj_weight_palettized, x = input_391)[name = string("mlp_out_27")]; + tensor var_8448_axes_0 = const()[name = string("op_8448_axes_0"), val = tensor([2])]; + tensor var_8448 = squeeze(axes = var_8448_axes_0, x = mlp_out_27)[name = string("op_8448")]; + tensor var_8452 = const()[name = string("op_8452"), val = tensor([0, 2, 1])]; + int32 var_8458 = const()[name = string("op_8458"), val = int32(-1)]; + fp16 const_153_promoted = const()[name = string("const_153_promoted"), val = fp16(-0x1p+0)]; + tensor x_267 = transpose(perm = var_8452, x = var_8448)[name = string("transpose_73")]; + tensor var_8460 = mul(x = x_267, y = const_153_promoted)[name = string("op_8460")]; + bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; + tensor input_393 = concat(axis = var_8458, interleave = input_393_interleave_0, values = (x_267, var_8460))[name = string("input_393")]; + tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; + fp16 var_8455_to_fp16 = const()[name = string("op_8455_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_8455_to_fp16, x = input_393)[name = string("normed_377_cast_fp16")]; + tensor var_8465_split_sizes_0 = const()[name = string("op_8465_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8465_axis_0 = const()[name = string("op_8465_axis_0"), val = int32(-1)]; + tensor var_8465_0, tensor var_8465_1 = split(axis = var_8465_axis_0, split_sizes = var_8465_split_sizes_0, x = normed_377_cast_fp16)[name = string("op_8465")]; + tensor hidden_states_133 = mul(x = var_8465_0, y = layers_c3_1_post_feedforward_layernorm_weight)[name = string("hidden_states_133")]; + tensor hidden_states_135_cast_fp16 = add(x = x_265_cast_fp16, y = hidden_states_133)[name = string("hidden_states_135_cast_fp16")]; + tensor per_layer_slice_27_begin_0 = const()[name = string("per_layer_slice_27_begin_0"), val = tensor([0, 0, 6400])]; + tensor per_layer_slice_27_end_0 = const()[name = string("per_layer_slice_27_end_0"), val = tensor([1, 1, 6656])]; + tensor per_layer_slice_27_end_mask_0 = const()[name = string("per_layer_slice_27_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_27_cast_fp16 = slice_by_index(begin = per_layer_slice_27_begin_0, end = per_layer_slice_27_end_0, end_mask = per_layer_slice_27_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_27_cast_fp16")]; + tensor var_8493 = const()[name = string("op_8493"), val = tensor([0, 2, 1])]; + tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; + tensor var_8494 = transpose(perm = var_8493, x = hidden_states_135_cast_fp16)[name = string("transpose_72")]; + tensor input_395 = expand_dims(axes = input_395_axes_0, x = var_8494)[name = string("input_395")]; + string gated_79_pad_type_0 = const()[name = string("gated_79_pad_type_0"), val = string("valid")]; + tensor gated_79_strides_0 = const()[name = string("gated_79_strides_0"), val = tensor([1, 1])]; + tensor gated_79_pad_0 = const()[name = string("gated_79_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_79_dilations_0 = const()[name = string("gated_79_dilations_0"), val = tensor([1, 1])]; + int32 gated_79_groups_0 = const()[name = string("gated_79_groups_0"), val = int32(1)]; + tensor gated_79 = conv(dilations = gated_79_dilations_0, groups = gated_79_groups_0, pad = gated_79_pad_0, pad_type = gated_79_pad_type_0, strides = gated_79_strides_0, weight = layers_c3_1_per_layer_input_gate_weight_palettized, x = input_395)[name = string("gated_79")]; + string gated_81_mode_0 = const()[name = string("gated_81_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_81 = gelu(mode = gated_81_mode_0, x = gated_79)[name = string("gated_81")]; + tensor var_8513 = const()[name = string("op_8513"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_27_axes_0 = const()[name = string("per_layer_slice_conv_27_axes_0"), val = tensor([2])]; + tensor var_8514_cast_fp16 = transpose(perm = var_8513, x = per_layer_slice_27_cast_fp16)[name = string("transpose_71")]; + tensor per_layer_slice_conv_27_cast_fp16 = expand_dims(axes = per_layer_slice_conv_27_axes_0, x = var_8514_cast_fp16)[name = string("per_layer_slice_conv_27_cast_fp16")]; + tensor input_397_cast_fp16 = mul(x = gated_81, y = per_layer_slice_conv_27_cast_fp16)[name = string("input_397_cast_fp16")]; + string gated_83_pad_type_0 = const()[name = string("gated_83_pad_type_0"), val = string("valid")]; + tensor gated_83_strides_0 = const()[name = string("gated_83_strides_0"), val = tensor([1, 1])]; + tensor gated_83_pad_0 = const()[name = string("gated_83_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_83_dilations_0 = const()[name = string("gated_83_dilations_0"), val = tensor([1, 1])]; + int32 gated_83_groups_0 = const()[name = string("gated_83_groups_0"), val = int32(1)]; + tensor layers_c3_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961152384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961480128))))[name = string("layers_c3_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_83_cast_fp16 = conv(dilations = gated_83_dilations_0, groups = gated_83_groups_0, pad = gated_83_pad_0, pad_type = gated_83_pad_type_0, strides = gated_83_strides_0, weight = layers_c3_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("gated_83_cast_fp16")]; + tensor var_8530_axes_0 = const()[name = string("op_8530_axes_0"), val = tensor([2])]; + tensor var_8530_cast_fp16 = squeeze(axes = var_8530_axes_0, x = gated_83_cast_fp16)[name = string("op_8530_cast_fp16")]; + tensor var_8534 = const()[name = string("op_8534"), val = tensor([0, 2, 1])]; + int32 var_8540 = const()[name = string("op_8540"), val = int32(-1)]; + fp16 const_154_promoted_to_fp16 = const()[name = string("const_154_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_269_cast_fp16 = transpose(perm = var_8534, x = var_8530_cast_fp16)[name = string("transpose_70")]; + tensor var_8542_cast_fp16 = mul(x = x_269_cast_fp16, y = const_154_promoted_to_fp16)[name = string("op_8542_cast_fp16")]; + bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; + tensor input_399_cast_fp16 = concat(axis = var_8540, interleave = input_399_interleave_0, values = (x_269_cast_fp16, var_8542_cast_fp16))[name = string("input_399_cast_fp16")]; + tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; + fp16 var_8537_to_fp16 = const()[name = string("op_8537_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_8537_to_fp16, x = input_399_cast_fp16)[name = string("normed_381_cast_fp16")]; + tensor var_8547_split_sizes_0 = const()[name = string("op_8547_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8547_axis_0 = const()[name = string("op_8547_axis_0"), val = int32(-1)]; + tensor var_8547_cast_fp16_0, tensor var_8547_cast_fp16_1 = split(axis = var_8547_axis_0, split_sizes = var_8547_split_sizes_0, x = normed_381_cast_fp16)[name = string("op_8547_cast_fp16")]; + tensor layers_c3_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961482752)))]; + tensor hidden_states_139_cast_fp16 = mul(x = var_8547_cast_fp16_0, y = layers_c3_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_139_cast_fp16")]; + tensor hidden_states_141_cast_fp16 = add(x = hidden_states_135_cast_fp16, y = hidden_states_139_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; + tensor const_155_promoted_to_fp16 = const()[name = string("const_155_promoted_to_fp16"), val = tensor([0x1.6ep-1])]; + tensor x_271_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_155_promoted_to_fp16)[name = string("x_271_cast_fp16")]; + int32 var_8562 = const()[name = string("op_8562"), val = int32(-1)]; + fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8564_cast_fp16 = mul(x = x_271_cast_fp16, y = const_156_promoted_to_fp16)[name = string("op_8564_cast_fp16")]; + bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; + tensor input_401_cast_fp16 = concat(axis = var_8562, interleave = input_401_interleave_0, values = (x_271_cast_fp16, var_8564_cast_fp16))[name = string("input_401_cast_fp16")]; + tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; + fp16 var_8559_to_fp16 = const()[name = string("op_8559_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_8559_to_fp16, x = input_401_cast_fp16)[name = string("normed_385_cast_fp16")]; + tensor var_8569_split_sizes_0 = const()[name = string("op_8569_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8569_axis_0 = const()[name = string("op_8569_axis_0"), val = int32(-1)]; + tensor var_8569_cast_fp16_0, tensor var_8569_cast_fp16_1 = split(axis = var_8569_axis_0, split_sizes = var_8569_split_sizes_0, x = normed_385_cast_fp16)[name = string("op_8569_cast_fp16")]; + tensor layers_c3_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961487936)))]; + tensor h_85_cast_fp16 = mul(x = var_8569_cast_fp16_0, y = layers_c3_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_85_cast_fp16")]; + tensor var_8575 = const()[name = string("op_8575"), val = tensor([0, 2, 1])]; + tensor var_8578_axes_0 = const()[name = string("op_8578_axes_0"), val = tensor([2])]; + tensor var_8576_cast_fp16 = transpose(perm = var_8575, x = h_85_cast_fp16)[name = string("transpose_69")]; + tensor var_8578_cast_fp16 = expand_dims(axes = var_8578_axes_0, x = var_8576_cast_fp16)[name = string("op_8578_cast_fp16")]; + string var_8594_pad_type_0 = const()[name = string("op_8594_pad_type_0"), val = string("valid")]; + tensor var_8594_strides_0 = const()[name = string("op_8594_strides_0"), val = tensor([1, 1])]; + tensor var_8594_pad_0 = const()[name = string("op_8594_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8594_dilations_0 = const()[name = string("op_8594_dilations_0"), val = tensor([1, 1])]; + int32 var_8594_groups_0 = const()[name = string("op_8594_groups_0"), val = int32(1)]; + tensor var_8594 = conv(dilations = var_8594_dilations_0, groups = var_8594_groups_0, pad = var_8594_pad_0, pad_type = var_8594_pad_type_0, strides = var_8594_strides_0, weight = layers_c3_2_self_attn_q_proj_weight_palettized, x = var_8578_cast_fp16)[name = string("op_8594")]; + tensor var_8599 = const()[name = string("op_8599"), val = tensor([1, 8, 256, 1])]; + tensor var_8600 = reshape(shape = var_8599, x = var_8594)[name = string("op_8600")]; + tensor var_8605 = const()[name = string("op_8605"), val = tensor([0, 1, 3, 2])]; + tensor var_8615 = const()[name = string("op_8615"), val = tensor([1, 8, 256])]; + tensor var_8606 = transpose(perm = var_8605, x = var_8600)[name = string("transpose_68")]; + tensor x_273 = reshape(shape = var_8615, x = var_8606)[name = string("x_273")]; + int32 var_8621 = const()[name = string("op_8621"), val = int32(-1)]; + fp16 const_157_promoted = const()[name = string("const_157_promoted"), val = fp16(-0x1p+0)]; + tensor var_8623 = mul(x = x_273, y = const_157_promoted)[name = string("op_8623")]; + bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; + tensor input_405 = concat(axis = var_8621, interleave = input_405_interleave_0, values = (x_273, var_8623))[name = string("input_405")]; + tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; + fp16 var_8618_to_fp16 = const()[name = string("op_8618_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_8618_to_fp16, x = input_405)[name = string("normed_389_cast_fp16")]; + tensor var_8628_split_sizes_0 = const()[name = string("op_8628_split_sizes_0"), val = tensor([256, 256])]; + int32 var_8628_axis_0 = const()[name = string("op_8628_axis_0"), val = int32(-1)]; + tensor var_8628_0, tensor var_8628_1 = split(axis = var_8628_axis_0, split_sizes = var_8628_split_sizes_0, x = normed_389_cast_fp16)[name = string("op_8628")]; + tensor var_8630 = mul(x = var_8628_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_8630")]; + tensor var_8635 = const()[name = string("op_8635"), val = tensor([1, 8, 1, 256])]; + tensor q_111 = reshape(shape = var_8635, x = var_8630)[name = string("q_111")]; + tensor var_8637_cast_fp16 = mul(x = q_111, y = cos_s)[name = string("op_8637_cast_fp16")]; + tensor var_8638_split_sizes_0 = const()[name = string("op_8638_split_sizes_0"), val = tensor([128, 128])]; + int32 var_8638_axis_0 = const()[name = string("op_8638_axis_0"), val = int32(-1)]; + tensor var_8638_0, tensor var_8638_1 = split(axis = var_8638_axis_0, split_sizes = var_8638_split_sizes_0, x = q_111)[name = string("op_8638")]; + fp16 const_158_promoted = const()[name = string("const_158_promoted"), val = fp16(-0x1p+0)]; + tensor var_8640 = mul(x = var_8638_1, y = const_158_promoted)[name = string("op_8640")]; + int32 var_8642 = const()[name = string("op_8642"), val = int32(-1)]; + bool var_8643_interleave_0 = const()[name = string("op_8643_interleave_0"), val = bool(false)]; + tensor var_8643 = concat(axis = var_8642, interleave = var_8643_interleave_0, values = (var_8640, var_8638_0))[name = string("op_8643")]; + tensor var_8644_cast_fp16 = mul(x = var_8643, y = sin_s)[name = string("op_8644_cast_fp16")]; + tensor q_113_cast_fp16 = add(x = var_8637_cast_fp16, y = var_8644_cast_fp16)[name = string("q_113_cast_fp16")]; + bool attn_weights_57_transpose_x_0 = const()[name = string("attn_weights_57_transpose_x_0"), val = bool(false)]; + bool attn_weights_57_transpose_y_0 = const()[name = string("attn_weights_57_transpose_y_0"), val = bool(false)]; + tensor attn_weights_57_cast_fp16 = matmul(transpose_x = attn_weights_57_transpose_x_0, transpose_y = attn_weights_57_transpose_y_0, x = q_113_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_57_cast_fp16")]; + tensor x_275_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = causal_mask_sliding)[name = string("x_275_cast_fp16")]; + tensor reduce_max_14_axes_0 = const()[name = string("reduce_max_14_axes_0"), val = tensor([-1])]; + bool reduce_max_14_keep_dims_0 = const()[name = string("reduce_max_14_keep_dims_0"), val = bool(true)]; + tensor reduce_max_14 = reduce_max(axes = reduce_max_14_axes_0, keep_dims = reduce_max_14_keep_dims_0, x = x_275_cast_fp16)[name = string("reduce_max_14")]; + tensor var_8676 = sub(x = x_275_cast_fp16, y = reduce_max_14)[name = string("op_8676")]; + tensor var_8682 = exp(x = var_8676)[name = string("op_8682")]; + tensor var_8692_axes_0 = const()[name = string("op_8692_axes_0"), val = tensor([-1])]; + bool var_8692_keep_dims_0 = const()[name = string("op_8692_keep_dims_0"), val = bool(true)]; + tensor var_8692 = reduce_sum(axes = var_8692_axes_0, keep_dims = var_8692_keep_dims_0, x = var_8682)[name = string("op_8692")]; + tensor var_8698_cast_fp16 = real_div(x = var_8682, y = var_8692)[name = string("op_8698_cast_fp16")]; + bool attn_output_85_transpose_x_0 = const()[name = string("attn_output_85_transpose_x_0"), val = bool(false)]; + bool attn_output_85_transpose_y_0 = const()[name = string("attn_output_85_transpose_y_0"), val = bool(false)]; + tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_0, transpose_y = attn_output_85_transpose_y_0, x = var_8698_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_8709 = const()[name = string("op_8709"), val = tensor([0, 2, 1, 3])]; + tensor var_8716 = const()[name = string("op_8716"), val = tensor([1, 1, -1])]; + tensor var_8710_cast_fp16 = transpose(perm = var_8709, x = attn_output_85_cast_fp16)[name = string("transpose_67")]; + tensor attn_output_87_cast_fp16 = reshape(shape = var_8716, x = var_8710_cast_fp16)[name = string("attn_output_87_cast_fp16")]; + tensor var_8721 = const()[name = string("op_8721"), val = tensor([0, 2, 1])]; + string var_8737_pad_type_0 = const()[name = string("op_8737_pad_type_0"), val = string("valid")]; + int32 var_8737_groups_0 = const()[name = string("op_8737_groups_0"), val = int32(1)]; + tensor var_8737_strides_0 = const()[name = string("op_8737_strides_0"), val = tensor([1])]; + tensor var_8737_pad_0 = const()[name = string("op_8737_pad_0"), val = tensor([0, 0])]; + tensor var_8737_dilations_0 = const()[name = string("op_8737_dilations_0"), val = tensor([1])]; + tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(961493120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964114624))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8722_cast_fp16 = transpose(perm = var_8721, x = attn_output_87_cast_fp16)[name = string("transpose_66")]; + tensor var_8737_cast_fp16 = conv(dilations = var_8737_dilations_0, groups = var_8737_groups_0, pad = var_8737_pad_0, pad_type = var_8737_pad_type_0, strides = var_8737_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_8722_cast_fp16)[name = string("op_8737_cast_fp16")]; + tensor var_8741 = const()[name = string("op_8741"), val = tensor([0, 2, 1])]; + int32 var_8747 = const()[name = string("op_8747"), val = int32(-1)]; + fp16 const_159_promoted_to_fp16 = const()[name = string("const_159_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_279_cast_fp16 = transpose(perm = var_8741, x = var_8737_cast_fp16)[name = string("transpose_65")]; + tensor var_8749_cast_fp16 = mul(x = x_279_cast_fp16, y = const_159_promoted_to_fp16)[name = string("op_8749_cast_fp16")]; + bool input_409_interleave_0 = const()[name = string("input_409_interleave_0"), val = bool(false)]; + tensor input_409_cast_fp16 = concat(axis = var_8747, interleave = input_409_interleave_0, values = (x_279_cast_fp16, var_8749_cast_fp16))[name = string("input_409_cast_fp16")]; + tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; + fp16 var_8744_to_fp16 = const()[name = string("op_8744_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_8744_to_fp16, x = input_409_cast_fp16)[name = string("normed_393_cast_fp16")]; + tensor var_8754_split_sizes_0 = const()[name = string("op_8754_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8754_axis_0 = const()[name = string("op_8754_axis_0"), val = int32(-1)]; + tensor var_8754_cast_fp16_0, tensor var_8754_cast_fp16_1 = split(axis = var_8754_axis_0, split_sizes = var_8754_split_sizes_0, x = normed_393_cast_fp16)[name = string("op_8754_cast_fp16")]; + tensor layers_c3_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964117248)))]; + tensor attn_output_89_cast_fp16 = mul(x = var_8754_cast_fp16_0, y = layers_c3_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_89_cast_fp16")]; + tensor x_281_cast_fp16 = add(x = x_271_cast_fp16, y = attn_output_89_cast_fp16)[name = string("x_281_cast_fp16")]; + int32 var_8763 = const()[name = string("op_8763"), val = int32(-1)]; + fp16 const_160_promoted_to_fp16 = const()[name = string("const_160_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8765_cast_fp16 = mul(x = x_281_cast_fp16, y = const_160_promoted_to_fp16)[name = string("op_8765_cast_fp16")]; + bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; + tensor input_411_cast_fp16 = concat(axis = var_8763, interleave = input_411_interleave_0, values = (x_281_cast_fp16, var_8765_cast_fp16))[name = string("input_411_cast_fp16")]; + tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; + fp16 var_8760_to_fp16 = const()[name = string("op_8760_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_8760_to_fp16, x = input_411_cast_fp16)[name = string("normed_397_cast_fp16")]; + tensor var_8770_split_sizes_0 = const()[name = string("op_8770_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8770_axis_0 = const()[name = string("op_8770_axis_0"), val = int32(-1)]; + tensor var_8770_cast_fp16_0, tensor var_8770_cast_fp16_1 = split(axis = var_8770_axis_0, split_sizes = var_8770_split_sizes_0, x = normed_397_cast_fp16)[name = string("op_8770_cast_fp16")]; + tensor layers_c3_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964122432)))]; + tensor h_87_cast_fp16 = mul(x = var_8770_cast_fp16_0, y = layers_c3_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_87_cast_fp16")]; + tensor var_8781 = const()[name = string("op_8781"), val = tensor([0, 2, 1])]; + tensor input_413_axes_0 = const()[name = string("input_413_axes_0"), val = tensor([2])]; + tensor var_8782 = transpose(perm = var_8781, x = h_87_cast_fp16)[name = string("transpose_64")]; + tensor input_413 = expand_dims(axes = input_413_axes_0, x = var_8782)[name = string("input_413")]; + string gate_57_pad_type_0 = const()[name = string("gate_57_pad_type_0"), val = string("valid")]; + tensor gate_57_strides_0 = const()[name = string("gate_57_strides_0"), val = tensor([1, 1])]; + tensor gate_57_pad_0 = const()[name = string("gate_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_57_dilations_0 = const()[name = string("gate_57_dilations_0"), val = tensor([1, 1])]; + int32 gate_57_groups_0 = const()[name = string("gate_57_groups_0"), val = int32(1)]; + tensor gate_57 = conv(dilations = gate_57_dilations_0, groups = gate_57_groups_0, pad = gate_57_pad_0, pad_type = gate_57_pad_type_0, strides = gate_57_strides_0, weight = layers_c3_2_mlp_gate_proj_weight_palettized, x = input_413)[name = string("gate_57")]; + string up_29_pad_type_0 = const()[name = string("up_29_pad_type_0"), val = string("valid")]; + tensor up_29_strides_0 = const()[name = string("up_29_strides_0"), val = tensor([1, 1])]; + tensor up_29_pad_0 = const()[name = string("up_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_29_dilations_0 = const()[name = string("up_29_dilations_0"), val = tensor([1, 1])]; + int32 up_29_groups_0 = const()[name = string("up_29_groups_0"), val = int32(1)]; + tensor up_29 = conv(dilations = up_29_dilations_0, groups = up_29_groups_0, pad = up_29_pad_0, pad_type = up_29_pad_type_0, strides = up_29_strides_0, weight = layers_c3_2_mlp_up_proj_weight_palettized, x = input_413)[name = string("up_29")]; + string gate_59_mode_0 = const()[name = string("gate_59_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_59 = gelu(mode = gate_59_mode_0, x = gate_57)[name = string("gate_59")]; + tensor input_415 = mul(x = gate_59, y = up_29)[name = string("input_415")]; + string mlp_out_29_pad_type_0 = const()[name = string("mlp_out_29_pad_type_0"), val = string("valid")]; + tensor mlp_out_29_strides_0 = const()[name = string("mlp_out_29_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_29_pad_0 = const()[name = string("mlp_out_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_29_dilations_0 = const()[name = string("mlp_out_29_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_29_groups_0 = const()[name = string("mlp_out_29_groups_0"), val = int32(1)]; + tensor mlp_out_29 = conv(dilations = mlp_out_29_dilations_0, groups = mlp_out_29_groups_0, pad = mlp_out_29_pad_0, pad_type = mlp_out_29_pad_type_0, strides = mlp_out_29_strides_0, weight = layers_c3_2_mlp_down_proj_weight_palettized, x = input_415)[name = string("mlp_out_29")]; + tensor var_8822_axes_0 = const()[name = string("op_8822_axes_0"), val = tensor([2])]; + tensor var_8822 = squeeze(axes = var_8822_axes_0, x = mlp_out_29)[name = string("op_8822")]; + tensor var_8826 = const()[name = string("op_8826"), val = tensor([0, 2, 1])]; + int32 var_8832 = const()[name = string("op_8832"), val = int32(-1)]; + fp16 const_161_promoted = const()[name = string("const_161_promoted"), val = fp16(-0x1p+0)]; + tensor x_283 = transpose(perm = var_8826, x = var_8822)[name = string("transpose_63")]; + tensor var_8834 = mul(x = x_283, y = const_161_promoted)[name = string("op_8834")]; + bool input_417_interleave_0 = const()[name = string("input_417_interleave_0"), val = bool(false)]; + tensor input_417 = concat(axis = var_8832, interleave = input_417_interleave_0, values = (x_283, var_8834))[name = string("input_417")]; + tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; + fp16 var_8829_to_fp16 = const()[name = string("op_8829_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_8829_to_fp16, x = input_417)[name = string("normed_401_cast_fp16")]; + tensor var_8839_split_sizes_0 = const()[name = string("op_8839_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8839_axis_0 = const()[name = string("op_8839_axis_0"), val = int32(-1)]; + tensor var_8839_0, tensor var_8839_1 = split(axis = var_8839_axis_0, split_sizes = var_8839_split_sizes_0, x = normed_401_cast_fp16)[name = string("op_8839")]; + tensor hidden_states_143 = mul(x = var_8839_0, y = layers_c3_2_post_feedforward_layernorm_weight)[name = string("hidden_states_143")]; + tensor hidden_states_145_cast_fp16 = add(x = x_281_cast_fp16, y = hidden_states_143)[name = string("hidden_states_145_cast_fp16")]; + tensor per_layer_slice_29_begin_0 = const()[name = string("per_layer_slice_29_begin_0"), val = tensor([0, 0, 6656])]; + tensor per_layer_slice_29_end_0 = const()[name = string("per_layer_slice_29_end_0"), val = tensor([1, 1, 6912])]; + tensor per_layer_slice_29_end_mask_0 = const()[name = string("per_layer_slice_29_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_29_cast_fp16 = slice_by_index(begin = per_layer_slice_29_begin_0, end = per_layer_slice_29_end_0, end_mask = per_layer_slice_29_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_29_cast_fp16")]; + tensor var_8867 = const()[name = string("op_8867"), val = tensor([0, 2, 1])]; + tensor input_419_axes_0 = const()[name = string("input_419_axes_0"), val = tensor([2])]; + tensor var_8868 = transpose(perm = var_8867, x = hidden_states_145_cast_fp16)[name = string("transpose_62")]; + tensor input_419 = expand_dims(axes = input_419_axes_0, x = var_8868)[name = string("input_419")]; + string gated_85_pad_type_0 = const()[name = string("gated_85_pad_type_0"), val = string("valid")]; + tensor gated_85_strides_0 = const()[name = string("gated_85_strides_0"), val = tensor([1, 1])]; + tensor gated_85_pad_0 = const()[name = string("gated_85_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_85_dilations_0 = const()[name = string("gated_85_dilations_0"), val = tensor([1, 1])]; + int32 gated_85_groups_0 = const()[name = string("gated_85_groups_0"), val = int32(1)]; + tensor gated_85 = conv(dilations = gated_85_dilations_0, groups = gated_85_groups_0, pad = gated_85_pad_0, pad_type = gated_85_pad_type_0, strides = gated_85_strides_0, weight = layers_c3_2_per_layer_input_gate_weight_palettized, x = input_419)[name = string("gated_85")]; + string gated_87_mode_0 = const()[name = string("gated_87_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_87 = gelu(mode = gated_87_mode_0, x = gated_85)[name = string("gated_87")]; + tensor var_8887 = const()[name = string("op_8887"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_29_axes_0 = const()[name = string("per_layer_slice_conv_29_axes_0"), val = tensor([2])]; + tensor var_8888_cast_fp16 = transpose(perm = var_8887, x = per_layer_slice_29_cast_fp16)[name = string("transpose_61")]; + tensor per_layer_slice_conv_29_cast_fp16 = expand_dims(axes = per_layer_slice_conv_29_axes_0, x = var_8888_cast_fp16)[name = string("per_layer_slice_conv_29_cast_fp16")]; + tensor input_421_cast_fp16 = mul(x = gated_87, y = per_layer_slice_conv_29_cast_fp16)[name = string("input_421_cast_fp16")]; + string gated_89_pad_type_0 = const()[name = string("gated_89_pad_type_0"), val = string("valid")]; + tensor gated_89_strides_0 = const()[name = string("gated_89_strides_0"), val = tensor([1, 1])]; + tensor gated_89_pad_0 = const()[name = string("gated_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_89_dilations_0 = const()[name = string("gated_89_dilations_0"), val = tensor([1, 1])]; + int32 gated_89_groups_0 = const()[name = string("gated_89_groups_0"), val = int32(1)]; + tensor layers_c3_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964127616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964455360))))[name = string("layers_c3_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_89_cast_fp16 = conv(dilations = gated_89_dilations_0, groups = gated_89_groups_0, pad = gated_89_pad_0, pad_type = gated_89_pad_type_0, strides = gated_89_strides_0, weight = layers_c3_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_421_cast_fp16)[name = string("gated_89_cast_fp16")]; + tensor var_8904_axes_0 = const()[name = string("op_8904_axes_0"), val = tensor([2])]; + tensor var_8904_cast_fp16 = squeeze(axes = var_8904_axes_0, x = gated_89_cast_fp16)[name = string("op_8904_cast_fp16")]; + tensor var_8908 = const()[name = string("op_8908"), val = tensor([0, 2, 1])]; + int32 var_8914 = const()[name = string("op_8914"), val = int32(-1)]; + fp16 const_162_promoted_to_fp16 = const()[name = string("const_162_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_285_cast_fp16 = transpose(perm = var_8908, x = var_8904_cast_fp16)[name = string("transpose_60")]; + tensor var_8916_cast_fp16 = mul(x = x_285_cast_fp16, y = const_162_promoted_to_fp16)[name = string("op_8916_cast_fp16")]; + bool input_423_interleave_0 = const()[name = string("input_423_interleave_0"), val = bool(false)]; + tensor input_423_cast_fp16 = concat(axis = var_8914, interleave = input_423_interleave_0, values = (x_285_cast_fp16, var_8916_cast_fp16))[name = string("input_423_cast_fp16")]; + tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; + fp16 var_8911_to_fp16 = const()[name = string("op_8911_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_8911_to_fp16, x = input_423_cast_fp16)[name = string("normed_405_cast_fp16")]; + tensor var_8921_split_sizes_0 = const()[name = string("op_8921_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8921_axis_0 = const()[name = string("op_8921_axis_0"), val = int32(-1)]; + tensor var_8921_cast_fp16_0, tensor var_8921_cast_fp16_1 = split(axis = var_8921_axis_0, split_sizes = var_8921_split_sizes_0, x = normed_405_cast_fp16)[name = string("op_8921_cast_fp16")]; + tensor layers_c3_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964457984)))]; + tensor hidden_states_149_cast_fp16 = mul(x = var_8921_cast_fp16_0, y = layers_c3_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_149_cast_fp16")]; + tensor hidden_states_151_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = hidden_states_149_cast_fp16)[name = string("hidden_states_151_cast_fp16")]; + tensor const_163_promoted_to_fp16 = const()[name = string("const_163_promoted_to_fp16"), val = tensor([0x1.6ep-1])]; + tensor x_287_cast_fp16 = mul(x = hidden_states_151_cast_fp16, y = const_163_promoted_to_fp16)[name = string("x_287_cast_fp16")]; + int32 var_8936 = const()[name = string("op_8936"), val = int32(-1)]; + fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8938_cast_fp16 = mul(x = x_287_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_8938_cast_fp16")]; + bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; + tensor input_425_cast_fp16 = concat(axis = var_8936, interleave = input_425_interleave_0, values = (x_287_cast_fp16, var_8938_cast_fp16))[name = string("input_425_cast_fp16")]; + tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; + fp16 var_8933_to_fp16 = const()[name = string("op_8933_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_8933_to_fp16, x = input_425_cast_fp16)[name = string("normed_409_cast_fp16")]; + tensor var_8943_split_sizes_0 = const()[name = string("op_8943_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_8943_axis_0 = const()[name = string("op_8943_axis_0"), val = int32(-1)]; + tensor var_8943_cast_fp16_0, tensor var_8943_cast_fp16_1 = split(axis = var_8943_axis_0, split_sizes = var_8943_split_sizes_0, x = normed_409_cast_fp16)[name = string("op_8943_cast_fp16")]; + tensor layers_c3_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964463168)))]; + tensor h_91_cast_fp16 = mul(x = var_8943_cast_fp16_0, y = layers_c3_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_91_cast_fp16")]; + tensor var_8949 = const()[name = string("op_8949"), val = tensor([0, 2, 1])]; + tensor var_8952_axes_0 = const()[name = string("op_8952_axes_0"), val = tensor([2])]; + tensor var_8950_cast_fp16 = transpose(perm = var_8949, x = h_91_cast_fp16)[name = string("transpose_59")]; + tensor var_8952_cast_fp16 = expand_dims(axes = var_8952_axes_0, x = var_8950_cast_fp16)[name = string("op_8952_cast_fp16")]; + string var_8968_pad_type_0 = const()[name = string("op_8968_pad_type_0"), val = string("valid")]; + tensor var_8968_strides_0 = const()[name = string("op_8968_strides_0"), val = tensor([1, 1])]; + tensor var_8968_pad_0 = const()[name = string("op_8968_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8968_dilations_0 = const()[name = string("op_8968_dilations_0"), val = tensor([1, 1])]; + int32 var_8968_groups_0 = const()[name = string("op_8968_groups_0"), val = int32(1)]; + tensor var_8968 = conv(dilations = var_8968_dilations_0, groups = var_8968_groups_0, pad = var_8968_pad_0, pad_type = var_8968_pad_type_0, strides = var_8968_strides_0, weight = layers_c3_3_self_attn_q_proj_weight_palettized, x = var_8952_cast_fp16)[name = string("op_8968")]; + tensor var_8973 = const()[name = string("op_8973"), val = tensor([1, 8, 256, 1])]; + tensor var_8974 = reshape(shape = var_8973, x = var_8968)[name = string("op_8974")]; + tensor var_8979 = const()[name = string("op_8979"), val = tensor([0, 1, 3, 2])]; + tensor var_8989 = const()[name = string("op_8989"), val = tensor([1, 8, 256])]; + tensor var_8980 = transpose(perm = var_8979, x = var_8974)[name = string("transpose_58")]; + tensor x_289 = reshape(shape = var_8989, x = var_8980)[name = string("x_289")]; + int32 var_8995 = const()[name = string("op_8995"), val = int32(-1)]; + fp16 const_165_promoted = const()[name = string("const_165_promoted"), val = fp16(-0x1p+0)]; + tensor var_8997 = mul(x = x_289, y = const_165_promoted)[name = string("op_8997")]; + bool input_429_interleave_0 = const()[name = string("input_429_interleave_0"), val = bool(false)]; + tensor input_429 = concat(axis = var_8995, interleave = input_429_interleave_0, values = (x_289, var_8997))[name = string("input_429")]; + tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; + fp16 var_8992_to_fp16 = const()[name = string("op_8992_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_8992_to_fp16, x = input_429)[name = string("normed_413_cast_fp16")]; + tensor var_9002_split_sizes_0 = const()[name = string("op_9002_split_sizes_0"), val = tensor([256, 256])]; + int32 var_9002_axis_0 = const()[name = string("op_9002_axis_0"), val = int32(-1)]; + tensor var_9002_0, tensor var_9002_1 = split(axis = var_9002_axis_0, split_sizes = var_9002_split_sizes_0, x = normed_413_cast_fp16)[name = string("op_9002")]; + tensor var_9004 = mul(x = var_9002_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_9004")]; + tensor var_9009 = const()[name = string("op_9009"), val = tensor([1, 8, 1, 256])]; + tensor q_117 = reshape(shape = var_9009, x = var_9004)[name = string("q_117")]; + tensor var_9011_cast_fp16 = mul(x = q_117, y = cos_s)[name = string("op_9011_cast_fp16")]; + tensor var_9012_split_sizes_0 = const()[name = string("op_9012_split_sizes_0"), val = tensor([128, 128])]; + int32 var_9012_axis_0 = const()[name = string("op_9012_axis_0"), val = int32(-1)]; + tensor var_9012_0, tensor var_9012_1 = split(axis = var_9012_axis_0, split_sizes = var_9012_split_sizes_0, x = q_117)[name = string("op_9012")]; + fp16 const_166_promoted = const()[name = string("const_166_promoted"), val = fp16(-0x1p+0)]; + tensor var_9014 = mul(x = var_9012_1, y = const_166_promoted)[name = string("op_9014")]; + int32 var_9016 = const()[name = string("op_9016"), val = int32(-1)]; + bool var_9017_interleave_0 = const()[name = string("op_9017_interleave_0"), val = bool(false)]; + tensor var_9017 = concat(axis = var_9016, interleave = var_9017_interleave_0, values = (var_9014, var_9012_0))[name = string("op_9017")]; + tensor var_9018_cast_fp16 = mul(x = var_9017, y = sin_s)[name = string("op_9018_cast_fp16")]; + tensor q_119_cast_fp16 = add(x = var_9011_cast_fp16, y = var_9018_cast_fp16)[name = string("q_119_cast_fp16")]; + bool attn_weights_61_transpose_x_0 = const()[name = string("attn_weights_61_transpose_x_0"), val = bool(false)]; + bool attn_weights_61_transpose_y_0 = const()[name = string("attn_weights_61_transpose_y_0"), val = bool(false)]; + tensor attn_weights_61_cast_fp16 = matmul(transpose_x = attn_weights_61_transpose_x_0, transpose_y = attn_weights_61_transpose_y_0, x = q_119_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_61_cast_fp16")]; + tensor x_291_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask_sliding)[name = string("x_291_cast_fp16")]; + tensor reduce_max_15_axes_0 = const()[name = string("reduce_max_15_axes_0"), val = tensor([-1])]; + bool reduce_max_15_keep_dims_0 = const()[name = string("reduce_max_15_keep_dims_0"), val = bool(true)]; + tensor reduce_max_15 = reduce_max(axes = reduce_max_15_axes_0, keep_dims = reduce_max_15_keep_dims_0, x = x_291_cast_fp16)[name = string("reduce_max_15")]; + tensor var_9050 = sub(x = x_291_cast_fp16, y = reduce_max_15)[name = string("op_9050")]; + tensor var_9056 = exp(x = var_9050)[name = string("op_9056")]; + tensor var_9066_axes_0 = const()[name = string("op_9066_axes_0"), val = tensor([-1])]; + bool var_9066_keep_dims_0 = const()[name = string("op_9066_keep_dims_0"), val = bool(true)]; + tensor var_9066 = reduce_sum(axes = var_9066_axes_0, keep_dims = var_9066_keep_dims_0, x = var_9056)[name = string("op_9066")]; + tensor var_9072_cast_fp16 = real_div(x = var_9056, y = var_9066)[name = string("op_9072_cast_fp16")]; + bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; + bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; + tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = var_9072_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_91_cast_fp16")]; + tensor var_9083 = const()[name = string("op_9083"), val = tensor([0, 2, 1, 3])]; + tensor var_9090 = const()[name = string("op_9090"), val = tensor([1, 1, -1])]; + tensor var_9084_cast_fp16 = transpose(perm = var_9083, x = attn_output_91_cast_fp16)[name = string("transpose_57")]; + tensor attn_output_93_cast_fp16 = reshape(shape = var_9090, x = var_9084_cast_fp16)[name = string("attn_output_93_cast_fp16")]; + tensor var_9095 = const()[name = string("op_9095"), val = tensor([0, 2, 1])]; + string var_9111_pad_type_0 = const()[name = string("op_9111_pad_type_0"), val = string("valid")]; + int32 var_9111_groups_0 = const()[name = string("op_9111_groups_0"), val = int32(1)]; + tensor var_9111_strides_0 = const()[name = string("op_9111_strides_0"), val = tensor([1])]; + tensor var_9111_pad_0 = const()[name = string("op_9111_pad_0"), val = tensor([0, 0])]; + tensor var_9111_dilations_0 = const()[name = string("op_9111_dilations_0"), val = tensor([1])]; + tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964468352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967089856))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9096_cast_fp16 = transpose(perm = var_9095, x = attn_output_93_cast_fp16)[name = string("transpose_56")]; + tensor var_9111_cast_fp16 = conv(dilations = var_9111_dilations_0, groups = var_9111_groups_0, pad = var_9111_pad_0, pad_type = var_9111_pad_type_0, strides = var_9111_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_9096_cast_fp16)[name = string("op_9111_cast_fp16")]; + tensor var_9115 = const()[name = string("op_9115"), val = tensor([0, 2, 1])]; + int32 var_9121 = const()[name = string("op_9121"), val = int32(-1)]; + fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_295_cast_fp16 = transpose(perm = var_9115, x = var_9111_cast_fp16)[name = string("transpose_55")]; + tensor var_9123_cast_fp16 = mul(x = x_295_cast_fp16, y = const_167_promoted_to_fp16)[name = string("op_9123_cast_fp16")]; + bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; + tensor input_433_cast_fp16 = concat(axis = var_9121, interleave = input_433_interleave_0, values = (x_295_cast_fp16, var_9123_cast_fp16))[name = string("input_433_cast_fp16")]; + tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; + fp16 var_9118_to_fp16 = const()[name = string("op_9118_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_9118_to_fp16, x = input_433_cast_fp16)[name = string("normed_417_cast_fp16")]; + tensor var_9128_split_sizes_0 = const()[name = string("op_9128_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9128_axis_0 = const()[name = string("op_9128_axis_0"), val = int32(-1)]; + tensor var_9128_cast_fp16_0, tensor var_9128_cast_fp16_1 = split(axis = var_9128_axis_0, split_sizes = var_9128_split_sizes_0, x = normed_417_cast_fp16)[name = string("op_9128_cast_fp16")]; + tensor layers_c3_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967092480)))]; + tensor attn_output_95_cast_fp16 = mul(x = var_9128_cast_fp16_0, y = layers_c3_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_95_cast_fp16")]; + tensor x_297_cast_fp16 = add(x = x_287_cast_fp16, y = attn_output_95_cast_fp16)[name = string("x_297_cast_fp16")]; + int32 var_9137 = const()[name = string("op_9137"), val = int32(-1)]; + fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9139_cast_fp16 = mul(x = x_297_cast_fp16, y = const_168_promoted_to_fp16)[name = string("op_9139_cast_fp16")]; + bool input_435_interleave_0 = const()[name = string("input_435_interleave_0"), val = bool(false)]; + tensor input_435_cast_fp16 = concat(axis = var_9137, interleave = input_435_interleave_0, values = (x_297_cast_fp16, var_9139_cast_fp16))[name = string("input_435_cast_fp16")]; + tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; + fp16 var_9134_to_fp16 = const()[name = string("op_9134_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_9134_to_fp16, x = input_435_cast_fp16)[name = string("normed_421_cast_fp16")]; + tensor var_9144_split_sizes_0 = const()[name = string("op_9144_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9144_axis_0 = const()[name = string("op_9144_axis_0"), val = int32(-1)]; + tensor var_9144_cast_fp16_0, tensor var_9144_cast_fp16_1 = split(axis = var_9144_axis_0, split_sizes = var_9144_split_sizes_0, x = normed_421_cast_fp16)[name = string("op_9144_cast_fp16")]; + tensor layers_c3_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967097664)))]; + tensor h_93_cast_fp16 = mul(x = var_9144_cast_fp16_0, y = layers_c3_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_93_cast_fp16")]; + tensor var_9155 = const()[name = string("op_9155"), val = tensor([0, 2, 1])]; + tensor input_437_axes_0 = const()[name = string("input_437_axes_0"), val = tensor([2])]; + tensor var_9156 = transpose(perm = var_9155, x = h_93_cast_fp16)[name = string("transpose_54")]; + tensor input_437 = expand_dims(axes = input_437_axes_0, x = var_9156)[name = string("input_437")]; + string gate_61_pad_type_0 = const()[name = string("gate_61_pad_type_0"), val = string("valid")]; + tensor gate_61_strides_0 = const()[name = string("gate_61_strides_0"), val = tensor([1, 1])]; + tensor gate_61_pad_0 = const()[name = string("gate_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_61_dilations_0 = const()[name = string("gate_61_dilations_0"), val = tensor([1, 1])]; + int32 gate_61_groups_0 = const()[name = string("gate_61_groups_0"), val = int32(1)]; + tensor gate_61 = conv(dilations = gate_61_dilations_0, groups = gate_61_groups_0, pad = gate_61_pad_0, pad_type = gate_61_pad_type_0, strides = gate_61_strides_0, weight = layers_c3_3_mlp_gate_proj_weight_palettized, x = input_437)[name = string("gate_61")]; + string up_31_pad_type_0 = const()[name = string("up_31_pad_type_0"), val = string("valid")]; + tensor up_31_strides_0 = const()[name = string("up_31_strides_0"), val = tensor([1, 1])]; + tensor up_31_pad_0 = const()[name = string("up_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_31_dilations_0 = const()[name = string("up_31_dilations_0"), val = tensor([1, 1])]; + int32 up_31_groups_0 = const()[name = string("up_31_groups_0"), val = int32(1)]; + tensor up_31 = conv(dilations = up_31_dilations_0, groups = up_31_groups_0, pad = up_31_pad_0, pad_type = up_31_pad_type_0, strides = up_31_strides_0, weight = layers_c3_3_mlp_up_proj_weight_palettized, x = input_437)[name = string("up_31")]; + string gate_63_mode_0 = const()[name = string("gate_63_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_63 = gelu(mode = gate_63_mode_0, x = gate_61)[name = string("gate_63")]; + tensor input_439 = mul(x = gate_63, y = up_31)[name = string("input_439")]; + string mlp_out_31_pad_type_0 = const()[name = string("mlp_out_31_pad_type_0"), val = string("valid")]; + tensor mlp_out_31_strides_0 = const()[name = string("mlp_out_31_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_31_pad_0 = const()[name = string("mlp_out_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_31_dilations_0 = const()[name = string("mlp_out_31_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_31_groups_0 = const()[name = string("mlp_out_31_groups_0"), val = int32(1)]; + tensor mlp_out_31 = conv(dilations = mlp_out_31_dilations_0, groups = mlp_out_31_groups_0, pad = mlp_out_31_pad_0, pad_type = mlp_out_31_pad_type_0, strides = mlp_out_31_strides_0, weight = layers_c3_3_mlp_down_proj_weight_palettized, x = input_439)[name = string("mlp_out_31")]; + tensor var_9196_axes_0 = const()[name = string("op_9196_axes_0"), val = tensor([2])]; + tensor var_9196 = squeeze(axes = var_9196_axes_0, x = mlp_out_31)[name = string("op_9196")]; + tensor var_9200 = const()[name = string("op_9200"), val = tensor([0, 2, 1])]; + int32 var_9206 = const()[name = string("op_9206"), val = int32(-1)]; + fp16 const_169_promoted = const()[name = string("const_169_promoted"), val = fp16(-0x1p+0)]; + tensor x_299 = transpose(perm = var_9200, x = var_9196)[name = string("transpose_53")]; + tensor var_9208 = mul(x = x_299, y = const_169_promoted)[name = string("op_9208")]; + bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; + tensor input_441 = concat(axis = var_9206, interleave = input_441_interleave_0, values = (x_299, var_9208))[name = string("input_441")]; + tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; + fp16 var_9203_to_fp16 = const()[name = string("op_9203_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_9203_to_fp16, x = input_441)[name = string("normed_425_cast_fp16")]; + tensor var_9213_split_sizes_0 = const()[name = string("op_9213_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9213_axis_0 = const()[name = string("op_9213_axis_0"), val = int32(-1)]; + tensor var_9213_0, tensor var_9213_1 = split(axis = var_9213_axis_0, split_sizes = var_9213_split_sizes_0, x = normed_425_cast_fp16)[name = string("op_9213")]; + tensor hidden_states_153 = mul(x = var_9213_0, y = layers_c3_3_post_feedforward_layernorm_weight)[name = string("hidden_states_153")]; + tensor hidden_states_155_cast_fp16 = add(x = x_297_cast_fp16, y = hidden_states_153)[name = string("hidden_states_155_cast_fp16")]; + tensor per_layer_slice_31_begin_0 = const()[name = string("per_layer_slice_31_begin_0"), val = tensor([0, 0, 6912])]; + tensor per_layer_slice_31_end_0 = const()[name = string("per_layer_slice_31_end_0"), val = tensor([1, 1, 7168])]; + tensor per_layer_slice_31_end_mask_0 = const()[name = string("per_layer_slice_31_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_31_cast_fp16 = slice_by_index(begin = per_layer_slice_31_begin_0, end = per_layer_slice_31_end_0, end_mask = per_layer_slice_31_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_31_cast_fp16")]; + tensor var_9241 = const()[name = string("op_9241"), val = tensor([0, 2, 1])]; + tensor input_443_axes_0 = const()[name = string("input_443_axes_0"), val = tensor([2])]; + tensor var_9242 = transpose(perm = var_9241, x = hidden_states_155_cast_fp16)[name = string("transpose_52")]; + tensor input_443 = expand_dims(axes = input_443_axes_0, x = var_9242)[name = string("input_443")]; + string gated_91_pad_type_0 = const()[name = string("gated_91_pad_type_0"), val = string("valid")]; + tensor gated_91_strides_0 = const()[name = string("gated_91_strides_0"), val = tensor([1, 1])]; + tensor gated_91_pad_0 = const()[name = string("gated_91_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_91_dilations_0 = const()[name = string("gated_91_dilations_0"), val = tensor([1, 1])]; + int32 gated_91_groups_0 = const()[name = string("gated_91_groups_0"), val = int32(1)]; + tensor gated_91 = conv(dilations = gated_91_dilations_0, groups = gated_91_groups_0, pad = gated_91_pad_0, pad_type = gated_91_pad_type_0, strides = gated_91_strides_0, weight = layers_c3_3_per_layer_input_gate_weight_palettized, x = input_443)[name = string("gated_91")]; + string gated_93_mode_0 = const()[name = string("gated_93_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_93 = gelu(mode = gated_93_mode_0, x = gated_91)[name = string("gated_93")]; + tensor var_9261 = const()[name = string("op_9261"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_31_axes_0 = const()[name = string("per_layer_slice_conv_31_axes_0"), val = tensor([2])]; + tensor var_9262_cast_fp16 = transpose(perm = var_9261, x = per_layer_slice_31_cast_fp16)[name = string("transpose_51")]; + tensor per_layer_slice_conv_31_cast_fp16 = expand_dims(axes = per_layer_slice_conv_31_axes_0, x = var_9262_cast_fp16)[name = string("per_layer_slice_conv_31_cast_fp16")]; + tensor input_445_cast_fp16 = mul(x = gated_93, y = per_layer_slice_conv_31_cast_fp16)[name = string("input_445_cast_fp16")]; + string gated_95_pad_type_0 = const()[name = string("gated_95_pad_type_0"), val = string("valid")]; + tensor gated_95_strides_0 = const()[name = string("gated_95_strides_0"), val = tensor([1, 1])]; + tensor gated_95_pad_0 = const()[name = string("gated_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_95_dilations_0 = const()[name = string("gated_95_dilations_0"), val = tensor([1, 1])]; + int32 gated_95_groups_0 = const()[name = string("gated_95_groups_0"), val = int32(1)]; + tensor layers_c3_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967102848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967430592))))[name = string("layers_c3_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_95_cast_fp16 = conv(dilations = gated_95_dilations_0, groups = gated_95_groups_0, pad = gated_95_pad_0, pad_type = gated_95_pad_type_0, strides = gated_95_strides_0, weight = layers_c3_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_445_cast_fp16)[name = string("gated_95_cast_fp16")]; + tensor var_9278_axes_0 = const()[name = string("op_9278_axes_0"), val = tensor([2])]; + tensor var_9278_cast_fp16 = squeeze(axes = var_9278_axes_0, x = gated_95_cast_fp16)[name = string("op_9278_cast_fp16")]; + tensor var_9282 = const()[name = string("op_9282"), val = tensor([0, 2, 1])]; + int32 var_9288 = const()[name = string("op_9288"), val = int32(-1)]; + fp16 const_170_promoted_to_fp16 = const()[name = string("const_170_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_301_cast_fp16 = transpose(perm = var_9282, x = var_9278_cast_fp16)[name = string("transpose_50")]; + tensor var_9290_cast_fp16 = mul(x = x_301_cast_fp16, y = const_170_promoted_to_fp16)[name = string("op_9290_cast_fp16")]; + bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; + tensor input_447_cast_fp16 = concat(axis = var_9288, interleave = input_447_interleave_0, values = (x_301_cast_fp16, var_9290_cast_fp16))[name = string("input_447_cast_fp16")]; + tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; + fp16 var_9285_to_fp16 = const()[name = string("op_9285_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_9285_to_fp16, x = input_447_cast_fp16)[name = string("normed_429_cast_fp16")]; + tensor var_9295_split_sizes_0 = const()[name = string("op_9295_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9295_axis_0 = const()[name = string("op_9295_axis_0"), val = int32(-1)]; + tensor var_9295_cast_fp16_0, tensor var_9295_cast_fp16_1 = split(axis = var_9295_axis_0, split_sizes = var_9295_split_sizes_0, x = normed_429_cast_fp16)[name = string("op_9295_cast_fp16")]; + tensor layers_c3_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967433216)))]; + tensor hidden_states_159_cast_fp16 = mul(x = var_9295_cast_fp16_0, y = layers_c3_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_159_cast_fp16")]; + tensor hidden_states_161_cast_fp16 = add(x = hidden_states_155_cast_fp16, y = hidden_states_159_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; + tensor const_171_promoted_to_fp16 = const()[name = string("const_171_promoted_to_fp16"), val = tensor([0x1.62p-1])]; + tensor x_303_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_171_promoted_to_fp16)[name = string("x_303_cast_fp16")]; + int32 var_9310 = const()[name = string("op_9310"), val = int32(-1)]; + fp16 const_172_promoted_to_fp16 = const()[name = string("const_172_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9312_cast_fp16 = mul(x = x_303_cast_fp16, y = const_172_promoted_to_fp16)[name = string("op_9312_cast_fp16")]; + bool input_449_interleave_0 = const()[name = string("input_449_interleave_0"), val = bool(false)]; + tensor input_449_cast_fp16 = concat(axis = var_9310, interleave = input_449_interleave_0, values = (x_303_cast_fp16, var_9312_cast_fp16))[name = string("input_449_cast_fp16")]; + tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; + fp16 var_9307_to_fp16 = const()[name = string("op_9307_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_9307_to_fp16, x = input_449_cast_fp16)[name = string("normed_433_cast_fp16")]; + tensor var_9317_split_sizes_0 = const()[name = string("op_9317_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9317_axis_0 = const()[name = string("op_9317_axis_0"), val = int32(-1)]; + tensor var_9317_cast_fp16_0, tensor var_9317_cast_fp16_1 = split(axis = var_9317_axis_0, split_sizes = var_9317_split_sizes_0, x = normed_433_cast_fp16)[name = string("op_9317_cast_fp16")]; + tensor layers_c3_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967438400)))]; + tensor h_97_cast_fp16 = mul(x = var_9317_cast_fp16_0, y = layers_c3_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_97_cast_fp16")]; + tensor var_9323 = const()[name = string("op_9323"), val = tensor([0, 2, 1])]; + tensor var_9326_axes_0 = const()[name = string("op_9326_axes_0"), val = tensor([2])]; + tensor var_9324_cast_fp16 = transpose(perm = var_9323, x = h_97_cast_fp16)[name = string("transpose_49")]; + tensor var_9326_cast_fp16 = expand_dims(axes = var_9326_axes_0, x = var_9324_cast_fp16)[name = string("op_9326_cast_fp16")]; + string var_9342_pad_type_0 = const()[name = string("op_9342_pad_type_0"), val = string("valid")]; + tensor var_9342_strides_0 = const()[name = string("op_9342_strides_0"), val = tensor([1, 1])]; + tensor var_9342_pad_0 = const()[name = string("op_9342_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9342_dilations_0 = const()[name = string("op_9342_dilations_0"), val = tensor([1, 1])]; + int32 var_9342_groups_0 = const()[name = string("op_9342_groups_0"), val = int32(1)]; + tensor var_9342 = conv(dilations = var_9342_dilations_0, groups = var_9342_groups_0, pad = var_9342_pad_0, pad_type = var_9342_pad_type_0, strides = var_9342_strides_0, weight = layers_c3_4_self_attn_q_proj_weight_palettized, x = var_9326_cast_fp16)[name = string("op_9342")]; + tensor var_9347 = const()[name = string("op_9347"), val = tensor([1, 8, 256, 1])]; + tensor var_9348 = reshape(shape = var_9347, x = var_9342)[name = string("op_9348")]; + tensor var_9353 = const()[name = string("op_9353"), val = tensor([0, 1, 3, 2])]; + tensor var_9363 = const()[name = string("op_9363"), val = tensor([1, 8, 256])]; + tensor var_9354 = transpose(perm = var_9353, x = var_9348)[name = string("transpose_48")]; + tensor x_305 = reshape(shape = var_9363, x = var_9354)[name = string("x_305")]; + int32 var_9369 = const()[name = string("op_9369"), val = int32(-1)]; + fp16 const_173_promoted = const()[name = string("const_173_promoted"), val = fp16(-0x1p+0)]; + tensor var_9371 = mul(x = x_305, y = const_173_promoted)[name = string("op_9371")]; + bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; + tensor input_453 = concat(axis = var_9369, interleave = input_453_interleave_0, values = (x_305, var_9371))[name = string("input_453")]; + tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; + fp16 var_9366_to_fp16 = const()[name = string("op_9366_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_9366_to_fp16, x = input_453)[name = string("normed_437_cast_fp16")]; + tensor var_9376_split_sizes_0 = const()[name = string("op_9376_split_sizes_0"), val = tensor([256, 256])]; + int32 var_9376_axis_0 = const()[name = string("op_9376_axis_0"), val = int32(-1)]; + tensor var_9376_0, tensor var_9376_1 = split(axis = var_9376_axis_0, split_sizes = var_9376_split_sizes_0, x = normed_437_cast_fp16)[name = string("op_9376")]; + tensor var_9378 = mul(x = var_9376_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_9378")]; + tensor var_9383 = const()[name = string("op_9383"), val = tensor([1, 8, 1, 256])]; + tensor q_123 = reshape(shape = var_9383, x = var_9378)[name = string("q_123")]; + tensor var_9385_cast_fp16 = mul(x = q_123, y = cos_s)[name = string("op_9385_cast_fp16")]; + tensor var_9386_split_sizes_0 = const()[name = string("op_9386_split_sizes_0"), val = tensor([128, 128])]; + int32 var_9386_axis_0 = const()[name = string("op_9386_axis_0"), val = int32(-1)]; + tensor var_9386_0, tensor var_9386_1 = split(axis = var_9386_axis_0, split_sizes = var_9386_split_sizes_0, x = q_123)[name = string("op_9386")]; + fp16 const_174_promoted = const()[name = string("const_174_promoted"), val = fp16(-0x1p+0)]; + tensor var_9388 = mul(x = var_9386_1, y = const_174_promoted)[name = string("op_9388")]; + int32 var_9390 = const()[name = string("op_9390"), val = int32(-1)]; + bool var_9391_interleave_0 = const()[name = string("op_9391_interleave_0"), val = bool(false)]; + tensor var_9391 = concat(axis = var_9390, interleave = var_9391_interleave_0, values = (var_9388, var_9386_0))[name = string("op_9391")]; + tensor var_9392_cast_fp16 = mul(x = var_9391, y = sin_s)[name = string("op_9392_cast_fp16")]; + tensor q_125_cast_fp16 = add(x = var_9385_cast_fp16, y = var_9392_cast_fp16)[name = string("q_125_cast_fp16")]; + bool attn_weights_65_transpose_x_0 = const()[name = string("attn_weights_65_transpose_x_0"), val = bool(false)]; + bool attn_weights_65_transpose_y_0 = const()[name = string("attn_weights_65_transpose_y_0"), val = bool(false)]; + tensor attn_weights_65_cast_fp16 = matmul(transpose_x = attn_weights_65_transpose_x_0, transpose_y = attn_weights_65_transpose_y_0, x = q_125_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; + tensor x_307_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = causal_mask_sliding)[name = string("x_307_cast_fp16")]; + tensor reduce_max_16_axes_0 = const()[name = string("reduce_max_16_axes_0"), val = tensor([-1])]; + bool reduce_max_16_keep_dims_0 = const()[name = string("reduce_max_16_keep_dims_0"), val = bool(true)]; + tensor reduce_max_16 = reduce_max(axes = reduce_max_16_axes_0, keep_dims = reduce_max_16_keep_dims_0, x = x_307_cast_fp16)[name = string("reduce_max_16")]; + tensor var_9424 = sub(x = x_307_cast_fp16, y = reduce_max_16)[name = string("op_9424")]; + tensor var_9430 = exp(x = var_9424)[name = string("op_9430")]; + tensor var_9440_axes_0 = const()[name = string("op_9440_axes_0"), val = tensor([-1])]; + bool var_9440_keep_dims_0 = const()[name = string("op_9440_keep_dims_0"), val = bool(true)]; + tensor var_9440 = reduce_sum(axes = var_9440_axes_0, keep_dims = var_9440_keep_dims_0, x = var_9430)[name = string("op_9440")]; + tensor var_9446_cast_fp16 = real_div(x = var_9430, y = var_9440)[name = string("op_9446_cast_fp16")]; + bool attn_output_97_transpose_x_0 = const()[name = string("attn_output_97_transpose_x_0"), val = bool(false)]; + bool attn_output_97_transpose_y_0 = const()[name = string("attn_output_97_transpose_y_0"), val = bool(false)]; + tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_0, transpose_y = attn_output_97_transpose_y_0, x = var_9446_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_97_cast_fp16")]; + tensor var_9457 = const()[name = string("op_9457"), val = tensor([0, 2, 1, 3])]; + tensor var_9464 = const()[name = string("op_9464"), val = tensor([1, 1, -1])]; + tensor var_9458_cast_fp16 = transpose(perm = var_9457, x = attn_output_97_cast_fp16)[name = string("transpose_47")]; + tensor attn_output_99_cast_fp16 = reshape(shape = var_9464, x = var_9458_cast_fp16)[name = string("attn_output_99_cast_fp16")]; + tensor var_9469 = const()[name = string("op_9469"), val = tensor([0, 2, 1])]; + string var_9485_pad_type_0 = const()[name = string("op_9485_pad_type_0"), val = string("valid")]; + int32 var_9485_groups_0 = const()[name = string("op_9485_groups_0"), val = int32(1)]; + tensor var_9485_strides_0 = const()[name = string("op_9485_strides_0"), val = tensor([1])]; + tensor var_9485_pad_0 = const()[name = string("op_9485_pad_0"), val = tensor([0, 0])]; + tensor var_9485_dilations_0 = const()[name = string("op_9485_dilations_0"), val = tensor([1])]; + tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967443584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970065088))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9470_cast_fp16 = transpose(perm = var_9469, x = attn_output_99_cast_fp16)[name = string("transpose_46")]; + tensor var_9485_cast_fp16 = conv(dilations = var_9485_dilations_0, groups = var_9485_groups_0, pad = var_9485_pad_0, pad_type = var_9485_pad_type_0, strides = var_9485_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_9470_cast_fp16)[name = string("op_9485_cast_fp16")]; + tensor var_9489 = const()[name = string("op_9489"), val = tensor([0, 2, 1])]; + int32 var_9495 = const()[name = string("op_9495"), val = int32(-1)]; + fp16 const_175_promoted_to_fp16 = const()[name = string("const_175_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_311_cast_fp16 = transpose(perm = var_9489, x = var_9485_cast_fp16)[name = string("transpose_45")]; + tensor var_9497_cast_fp16 = mul(x = x_311_cast_fp16, y = const_175_promoted_to_fp16)[name = string("op_9497_cast_fp16")]; + bool input_457_interleave_0 = const()[name = string("input_457_interleave_0"), val = bool(false)]; + tensor input_457_cast_fp16 = concat(axis = var_9495, interleave = input_457_interleave_0, values = (x_311_cast_fp16, var_9497_cast_fp16))[name = string("input_457_cast_fp16")]; + tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; + fp16 var_9492_to_fp16 = const()[name = string("op_9492_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_9492_to_fp16, x = input_457_cast_fp16)[name = string("normed_441_cast_fp16")]; + tensor var_9502_split_sizes_0 = const()[name = string("op_9502_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9502_axis_0 = const()[name = string("op_9502_axis_0"), val = int32(-1)]; + tensor var_9502_cast_fp16_0, tensor var_9502_cast_fp16_1 = split(axis = var_9502_axis_0, split_sizes = var_9502_split_sizes_0, x = normed_441_cast_fp16)[name = string("op_9502_cast_fp16")]; + tensor layers_c3_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970067712)))]; + tensor attn_output_101_cast_fp16 = mul(x = var_9502_cast_fp16_0, y = layers_c3_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_101_cast_fp16")]; + tensor x_313_cast_fp16 = add(x = x_303_cast_fp16, y = attn_output_101_cast_fp16)[name = string("x_313_cast_fp16")]; + int32 var_9511 = const()[name = string("op_9511"), val = int32(-1)]; + fp16 const_176_promoted_to_fp16 = const()[name = string("const_176_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9513_cast_fp16 = mul(x = x_313_cast_fp16, y = const_176_promoted_to_fp16)[name = string("op_9513_cast_fp16")]; + bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; + tensor input_459_cast_fp16 = concat(axis = var_9511, interleave = input_459_interleave_0, values = (x_313_cast_fp16, var_9513_cast_fp16))[name = string("input_459_cast_fp16")]; + tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; + fp16 var_9508_to_fp16 = const()[name = string("op_9508_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_9508_to_fp16, x = input_459_cast_fp16)[name = string("normed_445_cast_fp16")]; + tensor var_9518_split_sizes_0 = const()[name = string("op_9518_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9518_axis_0 = const()[name = string("op_9518_axis_0"), val = int32(-1)]; + tensor var_9518_cast_fp16_0, tensor var_9518_cast_fp16_1 = split(axis = var_9518_axis_0, split_sizes = var_9518_split_sizes_0, x = normed_445_cast_fp16)[name = string("op_9518_cast_fp16")]; + tensor layers_c3_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970072896)))]; + tensor h_99_cast_fp16 = mul(x = var_9518_cast_fp16_0, y = layers_c3_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_99_cast_fp16")]; + tensor var_9529 = const()[name = string("op_9529"), val = tensor([0, 2, 1])]; + tensor input_461_axes_0 = const()[name = string("input_461_axes_0"), val = tensor([2])]; + tensor var_9530 = transpose(perm = var_9529, x = h_99_cast_fp16)[name = string("transpose_44")]; + tensor input_461 = expand_dims(axes = input_461_axes_0, x = var_9530)[name = string("input_461")]; + string gate_65_pad_type_0 = const()[name = string("gate_65_pad_type_0"), val = string("valid")]; + tensor gate_65_strides_0 = const()[name = string("gate_65_strides_0"), val = tensor([1, 1])]; + tensor gate_65_pad_0 = const()[name = string("gate_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_65_dilations_0 = const()[name = string("gate_65_dilations_0"), val = tensor([1, 1])]; + int32 gate_65_groups_0 = const()[name = string("gate_65_groups_0"), val = int32(1)]; + tensor gate_65 = conv(dilations = gate_65_dilations_0, groups = gate_65_groups_0, pad = gate_65_pad_0, pad_type = gate_65_pad_type_0, strides = gate_65_strides_0, weight = layers_c3_4_mlp_gate_proj_weight_palettized, x = input_461)[name = string("gate_65")]; + string up_33_pad_type_0 = const()[name = string("up_33_pad_type_0"), val = string("valid")]; + tensor up_33_strides_0 = const()[name = string("up_33_strides_0"), val = tensor([1, 1])]; + tensor up_33_pad_0 = const()[name = string("up_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_33_dilations_0 = const()[name = string("up_33_dilations_0"), val = tensor([1, 1])]; + int32 up_33_groups_0 = const()[name = string("up_33_groups_0"), val = int32(1)]; + tensor up_33 = conv(dilations = up_33_dilations_0, groups = up_33_groups_0, pad = up_33_pad_0, pad_type = up_33_pad_type_0, strides = up_33_strides_0, weight = layers_c3_4_mlp_up_proj_weight_palettized, x = input_461)[name = string("up_33")]; + string gate_67_mode_0 = const()[name = string("gate_67_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_67 = gelu(mode = gate_67_mode_0, x = gate_65)[name = string("gate_67")]; + tensor input_463 = mul(x = gate_67, y = up_33)[name = string("input_463")]; + string mlp_out_33_pad_type_0 = const()[name = string("mlp_out_33_pad_type_0"), val = string("valid")]; + tensor mlp_out_33_strides_0 = const()[name = string("mlp_out_33_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_33_pad_0 = const()[name = string("mlp_out_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_33_dilations_0 = const()[name = string("mlp_out_33_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_33_groups_0 = const()[name = string("mlp_out_33_groups_0"), val = int32(1)]; + tensor mlp_out_33 = conv(dilations = mlp_out_33_dilations_0, groups = mlp_out_33_groups_0, pad = mlp_out_33_pad_0, pad_type = mlp_out_33_pad_type_0, strides = mlp_out_33_strides_0, weight = layers_c3_4_mlp_down_proj_weight_palettized, x = input_463)[name = string("mlp_out_33")]; + tensor var_9570_axes_0 = const()[name = string("op_9570_axes_0"), val = tensor([2])]; + tensor var_9570 = squeeze(axes = var_9570_axes_0, x = mlp_out_33)[name = string("op_9570")]; + tensor var_9574 = const()[name = string("op_9574"), val = tensor([0, 2, 1])]; + int32 var_9580 = const()[name = string("op_9580"), val = int32(-1)]; + fp16 const_177_promoted = const()[name = string("const_177_promoted"), val = fp16(-0x1p+0)]; + tensor x_315 = transpose(perm = var_9574, x = var_9570)[name = string("transpose_43")]; + tensor var_9582 = mul(x = x_315, y = const_177_promoted)[name = string("op_9582")]; + bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; + tensor input_465 = concat(axis = var_9580, interleave = input_465_interleave_0, values = (x_315, var_9582))[name = string("input_465")]; + tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; + fp16 var_9577_to_fp16 = const()[name = string("op_9577_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_9577_to_fp16, x = input_465)[name = string("normed_449_cast_fp16")]; + tensor var_9587_split_sizes_0 = const()[name = string("op_9587_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9587_axis_0 = const()[name = string("op_9587_axis_0"), val = int32(-1)]; + tensor var_9587_0, tensor var_9587_1 = split(axis = var_9587_axis_0, split_sizes = var_9587_split_sizes_0, x = normed_449_cast_fp16)[name = string("op_9587")]; + tensor hidden_states_163 = mul(x = var_9587_0, y = layers_c3_4_post_feedforward_layernorm_weight)[name = string("hidden_states_163")]; + tensor hidden_states_165_cast_fp16 = add(x = x_313_cast_fp16, y = hidden_states_163)[name = string("hidden_states_165_cast_fp16")]; + tensor per_layer_slice_33_begin_0 = const()[name = string("per_layer_slice_33_begin_0"), val = tensor([0, 0, 7168])]; + tensor per_layer_slice_33_end_0 = const()[name = string("per_layer_slice_33_end_0"), val = tensor([1, 1, 7424])]; + tensor per_layer_slice_33_end_mask_0 = const()[name = string("per_layer_slice_33_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_33_cast_fp16 = slice_by_index(begin = per_layer_slice_33_begin_0, end = per_layer_slice_33_end_0, end_mask = per_layer_slice_33_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_33_cast_fp16")]; + tensor var_9615 = const()[name = string("op_9615"), val = tensor([0, 2, 1])]; + tensor input_467_axes_0 = const()[name = string("input_467_axes_0"), val = tensor([2])]; + tensor var_9616 = transpose(perm = var_9615, x = hidden_states_165_cast_fp16)[name = string("transpose_42")]; + tensor input_467 = expand_dims(axes = input_467_axes_0, x = var_9616)[name = string("input_467")]; + string gated_97_pad_type_0 = const()[name = string("gated_97_pad_type_0"), val = string("valid")]; + tensor gated_97_strides_0 = const()[name = string("gated_97_strides_0"), val = tensor([1, 1])]; + tensor gated_97_pad_0 = const()[name = string("gated_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_97_dilations_0 = const()[name = string("gated_97_dilations_0"), val = tensor([1, 1])]; + int32 gated_97_groups_0 = const()[name = string("gated_97_groups_0"), val = int32(1)]; + tensor gated_97 = conv(dilations = gated_97_dilations_0, groups = gated_97_groups_0, pad = gated_97_pad_0, pad_type = gated_97_pad_type_0, strides = gated_97_strides_0, weight = layers_c3_4_per_layer_input_gate_weight_palettized, x = input_467)[name = string("gated_97")]; + string gated_99_mode_0 = const()[name = string("gated_99_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_99 = gelu(mode = gated_99_mode_0, x = gated_97)[name = string("gated_99")]; + tensor var_9635 = const()[name = string("op_9635"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_33_axes_0 = const()[name = string("per_layer_slice_conv_33_axes_0"), val = tensor([2])]; + tensor var_9636_cast_fp16 = transpose(perm = var_9635, x = per_layer_slice_33_cast_fp16)[name = string("transpose_41")]; + tensor per_layer_slice_conv_33_cast_fp16 = expand_dims(axes = per_layer_slice_conv_33_axes_0, x = var_9636_cast_fp16)[name = string("per_layer_slice_conv_33_cast_fp16")]; + tensor input_469_cast_fp16 = mul(x = gated_99, y = per_layer_slice_conv_33_cast_fp16)[name = string("input_469_cast_fp16")]; + string gated_101_pad_type_0 = const()[name = string("gated_101_pad_type_0"), val = string("valid")]; + tensor gated_101_strides_0 = const()[name = string("gated_101_strides_0"), val = tensor([1, 1])]; + tensor gated_101_pad_0 = const()[name = string("gated_101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_101_dilations_0 = const()[name = string("gated_101_dilations_0"), val = tensor([1, 1])]; + int32 gated_101_groups_0 = const()[name = string("gated_101_groups_0"), val = int32(1)]; + tensor layers_c3_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970078080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970405824))))[name = string("layers_c3_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_101_cast_fp16 = conv(dilations = gated_101_dilations_0, groups = gated_101_groups_0, pad = gated_101_pad_0, pad_type = gated_101_pad_type_0, strides = gated_101_strides_0, weight = layers_c3_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_469_cast_fp16)[name = string("gated_101_cast_fp16")]; + tensor var_9652_axes_0 = const()[name = string("op_9652_axes_0"), val = tensor([2])]; + tensor var_9652_cast_fp16 = squeeze(axes = var_9652_axes_0, x = gated_101_cast_fp16)[name = string("op_9652_cast_fp16")]; + tensor var_9656 = const()[name = string("op_9656"), val = tensor([0, 2, 1])]; + int32 var_9662 = const()[name = string("op_9662"), val = int32(-1)]; + fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_317_cast_fp16 = transpose(perm = var_9656, x = var_9652_cast_fp16)[name = string("transpose_40")]; + tensor var_9664_cast_fp16 = mul(x = x_317_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_9664_cast_fp16")]; + bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; + tensor input_471_cast_fp16 = concat(axis = var_9662, interleave = input_471_interleave_0, values = (x_317_cast_fp16, var_9664_cast_fp16))[name = string("input_471_cast_fp16")]; + tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; + fp16 var_9659_to_fp16 = const()[name = string("op_9659_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_9659_to_fp16, x = input_471_cast_fp16)[name = string("normed_453_cast_fp16")]; + tensor var_9669_split_sizes_0 = const()[name = string("op_9669_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9669_axis_0 = const()[name = string("op_9669_axis_0"), val = int32(-1)]; + tensor var_9669_cast_fp16_0, tensor var_9669_cast_fp16_1 = split(axis = var_9669_axis_0, split_sizes = var_9669_split_sizes_0, x = normed_453_cast_fp16)[name = string("op_9669_cast_fp16")]; + tensor layers_c3_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970408448)))]; + tensor hidden_states_169_cast_fp16 = mul(x = var_9669_cast_fp16_0, y = layers_c3_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_169_cast_fp16")]; + tensor hidden_states_171_cast_fp16 = add(x = hidden_states_165_cast_fp16, y = hidden_states_169_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; + tensor const_179_promoted_to_fp16 = const()[name = string("const_179_promoted_to_fp16"), val = tensor([0x1.3ap-1])]; + tensor x_319_cast_fp16 = mul(x = hidden_states_171_cast_fp16, y = const_179_promoted_to_fp16)[name = string("x_319_cast_fp16")]; + int32 var_9684 = const()[name = string("op_9684"), val = int32(-1)]; + fp16 const_180_promoted_to_fp16 = const()[name = string("const_180_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9686_cast_fp16 = mul(x = x_319_cast_fp16, y = const_180_promoted_to_fp16)[name = string("op_9686_cast_fp16")]; + bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; + tensor input_473_cast_fp16 = concat(axis = var_9684, interleave = input_473_interleave_0, values = (x_319_cast_fp16, var_9686_cast_fp16))[name = string("input_473_cast_fp16")]; + tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; + fp16 var_9681_to_fp16 = const()[name = string("op_9681_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_9681_to_fp16, x = input_473_cast_fp16)[name = string("normed_457_cast_fp16")]; + tensor var_9691_split_sizes_0 = const()[name = string("op_9691_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9691_axis_0 = const()[name = string("op_9691_axis_0"), val = int32(-1)]; + tensor var_9691_cast_fp16_0, tensor var_9691_cast_fp16_1 = split(axis = var_9691_axis_0, split_sizes = var_9691_split_sizes_0, x = normed_457_cast_fp16)[name = string("op_9691_cast_fp16")]; + tensor layers_c3_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970413632)))]; + tensor h_103_cast_fp16 = mul(x = var_9691_cast_fp16_0, y = layers_c3_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_103_cast_fp16")]; + tensor var_9697 = const()[name = string("op_9697"), val = tensor([0, 2, 1])]; + tensor var_9700_axes_0 = const()[name = string("op_9700_axes_0"), val = tensor([2])]; + tensor var_9698_cast_fp16 = transpose(perm = var_9697, x = h_103_cast_fp16)[name = string("transpose_39")]; + tensor var_9700_cast_fp16 = expand_dims(axes = var_9700_axes_0, x = var_9698_cast_fp16)[name = string("op_9700_cast_fp16")]; + string var_9716_pad_type_0 = const()[name = string("op_9716_pad_type_0"), val = string("valid")]; + tensor var_9716_strides_0 = const()[name = string("op_9716_strides_0"), val = tensor([1, 1])]; + tensor var_9716_pad_0 = const()[name = string("op_9716_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9716_dilations_0 = const()[name = string("op_9716_dilations_0"), val = tensor([1, 1])]; + int32 var_9716_groups_0 = const()[name = string("op_9716_groups_0"), val = int32(1)]; + tensor var_9716 = conv(dilations = var_9716_dilations_0, groups = var_9716_groups_0, pad = var_9716_pad_0, pad_type = var_9716_pad_type_0, strides = var_9716_strides_0, weight = layers_c3_5_self_attn_q_proj_weight_palettized, x = var_9700_cast_fp16)[name = string("op_9716")]; + tensor var_9721 = const()[name = string("op_9721"), val = tensor([1, 8, 512, 1])]; + tensor var_9722 = reshape(shape = var_9721, x = var_9716)[name = string("op_9722")]; + tensor var_9727 = const()[name = string("op_9727"), val = tensor([0, 1, 3, 2])]; + tensor var_9737 = const()[name = string("op_9737"), val = tensor([1, 8, 512])]; + tensor var_9728 = transpose(perm = var_9727, x = var_9722)[name = string("transpose_38")]; + tensor x_321 = reshape(shape = var_9737, x = var_9728)[name = string("x_321")]; + int32 var_9743 = const()[name = string("op_9743"), val = int32(-1)]; + fp16 const_181_promoted = const()[name = string("const_181_promoted"), val = fp16(-0x1p+0)]; + tensor var_9745 = mul(x = x_321, y = const_181_promoted)[name = string("op_9745")]; + bool input_477_interleave_0 = const()[name = string("input_477_interleave_0"), val = bool(false)]; + tensor input_477 = concat(axis = var_9743, interleave = input_477_interleave_0, values = (x_321, var_9745))[name = string("input_477")]; + tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; + fp16 var_9740_to_fp16 = const()[name = string("op_9740_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_9740_to_fp16, x = input_477)[name = string("normed_461_cast_fp16")]; + tensor var_9750_split_sizes_0 = const()[name = string("op_9750_split_sizes_0"), val = tensor([512, 512])]; + int32 var_9750_axis_0 = const()[name = string("op_9750_axis_0"), val = int32(-1)]; + tensor var_9750_0, tensor var_9750_1 = split(axis = var_9750_axis_0, split_sizes = var_9750_split_sizes_0, x = normed_461_cast_fp16)[name = string("op_9750")]; + tensor var_9752 = mul(x = var_9750_0, y = layers_c2_11_self_attn_q_norm_weight)[name = string("op_9752")]; + tensor var_9757 = const()[name = string("op_9757"), val = tensor([1, 8, 1, 512])]; + tensor q_129 = reshape(shape = var_9757, x = var_9752)[name = string("q_129")]; + tensor var_9759_cast_fp16 = mul(x = q_129, y = cos_f)[name = string("op_9759_cast_fp16")]; + tensor var_9760_split_sizes_0 = const()[name = string("op_9760_split_sizes_0"), val = tensor([256, 256])]; + int32 var_9760_axis_0 = const()[name = string("op_9760_axis_0"), val = int32(-1)]; + tensor var_9760_0, tensor var_9760_1 = split(axis = var_9760_axis_0, split_sizes = var_9760_split_sizes_0, x = q_129)[name = string("op_9760")]; + fp16 const_182_promoted = const()[name = string("const_182_promoted"), val = fp16(-0x1p+0)]; + tensor var_9762 = mul(x = var_9760_1, y = const_182_promoted)[name = string("op_9762")]; + int32 var_9764 = const()[name = string("op_9764"), val = int32(-1)]; + bool var_9765_interleave_0 = const()[name = string("op_9765_interleave_0"), val = bool(false)]; + tensor var_9765 = concat(axis = var_9764, interleave = var_9765_interleave_0, values = (var_9762, var_9760_0))[name = string("op_9765")]; + tensor var_9766_cast_fp16 = mul(x = var_9765, y = sin_f)[name = string("op_9766_cast_fp16")]; + tensor q_131_cast_fp16 = add(x = var_9759_cast_fp16, y = var_9766_cast_fp16)[name = string("q_131_cast_fp16")]; + bool attn_weights_69_transpose_x_0 = const()[name = string("attn_weights_69_transpose_x_0"), val = bool(false)]; + bool attn_weights_69_transpose_y_0 = const()[name = string("attn_weights_69_transpose_y_0"), val = bool(false)]; + tensor attn_weights_69_cast_fp16 = matmul(transpose_x = attn_weights_69_transpose_x_0, transpose_y = attn_weights_69_transpose_y_0, x = q_131_cast_fp16, y = transpose_95_cast_fp16)[name = string("attn_weights_69_cast_fp16")]; + tensor x_323_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask_full)[name = string("x_323_cast_fp16")]; + tensor reduce_max_17_axes_0 = const()[name = string("reduce_max_17_axes_0"), val = tensor([-1])]; + bool reduce_max_17_keep_dims_0 = const()[name = string("reduce_max_17_keep_dims_0"), val = bool(true)]; + tensor reduce_max_17 = reduce_max(axes = reduce_max_17_axes_0, keep_dims = reduce_max_17_keep_dims_0, x = x_323_cast_fp16)[name = string("reduce_max_17")]; + tensor var_9798 = sub(x = x_323_cast_fp16, y = reduce_max_17)[name = string("op_9798")]; + tensor var_9804 = exp(x = var_9798)[name = string("op_9804")]; + tensor var_9814_axes_0 = const()[name = string("op_9814_axes_0"), val = tensor([-1])]; + bool var_9814_keep_dims_0 = const()[name = string("op_9814_keep_dims_0"), val = bool(true)]; + tensor var_9814 = reduce_sum(axes = var_9814_axes_0, keep_dims = var_9814_keep_dims_0, x = var_9804)[name = string("op_9814")]; + tensor var_9820_cast_fp16 = real_div(x = var_9804, y = var_9814)[name = string("op_9820_cast_fp16")]; + bool attn_output_103_transpose_x_0 = const()[name = string("attn_output_103_transpose_x_0"), val = bool(false)]; + bool attn_output_103_transpose_y_0 = const()[name = string("attn_output_103_transpose_y_0"), val = bool(false)]; + tensor attn_output_103_cast_fp16 = matmul(transpose_x = attn_output_103_transpose_x_0, transpose_y = attn_output_103_transpose_y_0, x = var_9820_cast_fp16, y = V_expanded_23_cast_fp16)[name = string("attn_output_103_cast_fp16")]; + tensor var_9831 = const()[name = string("op_9831"), val = tensor([0, 2, 1, 3])]; + tensor var_9838 = const()[name = string("op_9838"), val = tensor([1, 1, -1])]; + tensor var_9832_cast_fp16 = transpose(perm = var_9831, x = attn_output_103_cast_fp16)[name = string("transpose_37")]; + tensor attn_output_105_cast_fp16 = reshape(shape = var_9838, x = var_9832_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_9843 = const()[name = string("op_9843"), val = tensor([0, 2, 1])]; + string var_9859_pad_type_0 = const()[name = string("op_9859_pad_type_0"), val = string("valid")]; + int32 var_9859_groups_0 = const()[name = string("op_9859_groups_0"), val = int32(1)]; + tensor var_9859_strides_0 = const()[name = string("op_9859_strides_0"), val = tensor([1])]; + tensor var_9859_pad_0 = const()[name = string("op_9859_pad_0"), val = tensor([0, 0])]; + tensor var_9859_dilations_0 = const()[name = string("op_9859_dilations_0"), val = tensor([1])]; + tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970418816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(975661760))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9844_cast_fp16 = transpose(perm = var_9843, x = attn_output_105_cast_fp16)[name = string("transpose_36")]; + tensor var_9859_cast_fp16 = conv(dilations = var_9859_dilations_0, groups = var_9859_groups_0, pad = var_9859_pad_0, pad_type = var_9859_pad_type_0, strides = var_9859_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_9844_cast_fp16)[name = string("op_9859_cast_fp16")]; + tensor var_9863 = const()[name = string("op_9863"), val = tensor([0, 2, 1])]; + int32 var_9869 = const()[name = string("op_9869"), val = int32(-1)]; + fp16 const_183_promoted_to_fp16 = const()[name = string("const_183_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_327_cast_fp16 = transpose(perm = var_9863, x = var_9859_cast_fp16)[name = string("transpose_35")]; + tensor var_9871_cast_fp16 = mul(x = x_327_cast_fp16, y = const_183_promoted_to_fp16)[name = string("op_9871_cast_fp16")]; + bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; + tensor input_481_cast_fp16 = concat(axis = var_9869, interleave = input_481_interleave_0, values = (x_327_cast_fp16, var_9871_cast_fp16))[name = string("input_481_cast_fp16")]; + tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; + fp16 var_9866_to_fp16 = const()[name = string("op_9866_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_9866_to_fp16, x = input_481_cast_fp16)[name = string("normed_465_cast_fp16")]; + tensor var_9876_split_sizes_0 = const()[name = string("op_9876_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9876_axis_0 = const()[name = string("op_9876_axis_0"), val = int32(-1)]; + tensor var_9876_cast_fp16_0, tensor var_9876_cast_fp16_1 = split(axis = var_9876_axis_0, split_sizes = var_9876_split_sizes_0, x = normed_465_cast_fp16)[name = string("op_9876_cast_fp16")]; + tensor layers_c3_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(975664384)))]; + tensor attn_output_107_cast_fp16 = mul(x = var_9876_cast_fp16_0, y = layers_c3_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_107_cast_fp16")]; + tensor x_329_cast_fp16 = add(x = x_319_cast_fp16, y = attn_output_107_cast_fp16)[name = string("x_329_cast_fp16")]; + int32 var_9885 = const()[name = string("op_9885"), val = int32(-1)]; + fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9887_cast_fp16 = mul(x = x_329_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_9887_cast_fp16")]; + bool input_483_interleave_0 = const()[name = string("input_483_interleave_0"), val = bool(false)]; + tensor input_483_cast_fp16 = concat(axis = var_9885, interleave = input_483_interleave_0, values = (x_329_cast_fp16, var_9887_cast_fp16))[name = string("input_483_cast_fp16")]; + tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; + fp16 var_9882_to_fp16 = const()[name = string("op_9882_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_9882_to_fp16, x = input_483_cast_fp16)[name = string("normed_469_cast_fp16")]; + tensor var_9892_split_sizes_0 = const()[name = string("op_9892_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9892_axis_0 = const()[name = string("op_9892_axis_0"), val = int32(-1)]; + tensor var_9892_cast_fp16_0, tensor var_9892_cast_fp16_1 = split(axis = var_9892_axis_0, split_sizes = var_9892_split_sizes_0, x = normed_469_cast_fp16)[name = string("op_9892_cast_fp16")]; + tensor layers_c3_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(975669568)))]; + tensor h_105_cast_fp16 = mul(x = var_9892_cast_fp16_0, y = layers_c3_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_105_cast_fp16")]; + tensor var_9903 = const()[name = string("op_9903"), val = tensor([0, 2, 1])]; + tensor input_485_axes_0 = const()[name = string("input_485_axes_0"), val = tensor([2])]; + tensor var_9904 = transpose(perm = var_9903, x = h_105_cast_fp16)[name = string("transpose_34")]; + tensor input_485 = expand_dims(axes = input_485_axes_0, x = var_9904)[name = string("input_485")]; + string gate_69_pad_type_0 = const()[name = string("gate_69_pad_type_0"), val = string("valid")]; + tensor gate_69_strides_0 = const()[name = string("gate_69_strides_0"), val = tensor([1, 1])]; + tensor gate_69_pad_0 = const()[name = string("gate_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_69_dilations_0 = const()[name = string("gate_69_dilations_0"), val = tensor([1, 1])]; + int32 gate_69_groups_0 = const()[name = string("gate_69_groups_0"), val = int32(1)]; + tensor gate_69 = conv(dilations = gate_69_dilations_0, groups = gate_69_groups_0, pad = gate_69_pad_0, pad_type = gate_69_pad_type_0, strides = gate_69_strides_0, weight = layers_c3_5_mlp_gate_proj_weight_palettized, x = input_485)[name = string("gate_69")]; + string up_35_pad_type_0 = const()[name = string("up_35_pad_type_0"), val = string("valid")]; + tensor up_35_strides_0 = const()[name = string("up_35_strides_0"), val = tensor([1, 1])]; + tensor up_35_pad_0 = const()[name = string("up_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_35_dilations_0 = const()[name = string("up_35_dilations_0"), val = tensor([1, 1])]; + int32 up_35_groups_0 = const()[name = string("up_35_groups_0"), val = int32(1)]; + tensor up_35 = conv(dilations = up_35_dilations_0, groups = up_35_groups_0, pad = up_35_pad_0, pad_type = up_35_pad_type_0, strides = up_35_strides_0, weight = layers_c3_5_mlp_up_proj_weight_palettized, x = input_485)[name = string("up_35")]; + string gate_71_mode_0 = const()[name = string("gate_71_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_71 = gelu(mode = gate_71_mode_0, x = gate_69)[name = string("gate_71")]; + tensor input_487 = mul(x = gate_71, y = up_35)[name = string("input_487")]; + string mlp_out_35_pad_type_0 = const()[name = string("mlp_out_35_pad_type_0"), val = string("valid")]; + tensor mlp_out_35_strides_0 = const()[name = string("mlp_out_35_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_35_pad_0 = const()[name = string("mlp_out_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_35_dilations_0 = const()[name = string("mlp_out_35_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_35_groups_0 = const()[name = string("mlp_out_35_groups_0"), val = int32(1)]; + tensor mlp_out_35 = conv(dilations = mlp_out_35_dilations_0, groups = mlp_out_35_groups_0, pad = mlp_out_35_pad_0, pad_type = mlp_out_35_pad_type_0, strides = mlp_out_35_strides_0, weight = layers_c3_5_mlp_down_proj_weight_palettized, x = input_487)[name = string("mlp_out_35")]; + tensor var_9944_axes_0 = const()[name = string("op_9944_axes_0"), val = tensor([2])]; + tensor var_9944 = squeeze(axes = var_9944_axes_0, x = mlp_out_35)[name = string("op_9944")]; + tensor var_9948 = const()[name = string("op_9948"), val = tensor([0, 2, 1])]; + int32 var_9954 = const()[name = string("op_9954"), val = int32(-1)]; + fp16 const_185_promoted = const()[name = string("const_185_promoted"), val = fp16(-0x1p+0)]; + tensor x_331 = transpose(perm = var_9948, x = var_9944)[name = string("transpose_33")]; + tensor var_9956 = mul(x = x_331, y = const_185_promoted)[name = string("op_9956")]; + bool input_489_interleave_0 = const()[name = string("input_489_interleave_0"), val = bool(false)]; + tensor input_489 = concat(axis = var_9954, interleave = input_489_interleave_0, values = (x_331, var_9956))[name = string("input_489")]; + tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; + fp16 var_9951_to_fp16 = const()[name = string("op_9951_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_9951_to_fp16, x = input_489)[name = string("normed_473_cast_fp16")]; + tensor var_9961_split_sizes_0 = const()[name = string("op_9961_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_9961_axis_0 = const()[name = string("op_9961_axis_0"), val = int32(-1)]; + tensor var_9961_0, tensor var_9961_1 = split(axis = var_9961_axis_0, split_sizes = var_9961_split_sizes_0, x = normed_473_cast_fp16)[name = string("op_9961")]; + tensor hidden_states_173 = mul(x = var_9961_0, y = layers_c3_5_post_feedforward_layernorm_weight)[name = string("hidden_states_173")]; + tensor hidden_states_175_cast_fp16 = add(x = x_329_cast_fp16, y = hidden_states_173)[name = string("hidden_states_175_cast_fp16")]; + tensor per_layer_slice_35_begin_0 = const()[name = string("per_layer_slice_35_begin_0"), val = tensor([0, 0, 7424])]; + tensor per_layer_slice_35_end_0 = const()[name = string("per_layer_slice_35_end_0"), val = tensor([1, 1, 7680])]; + tensor per_layer_slice_35_end_mask_0 = const()[name = string("per_layer_slice_35_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_35_cast_fp16 = slice_by_index(begin = per_layer_slice_35_begin_0, end = per_layer_slice_35_end_0, end_mask = per_layer_slice_35_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_35_cast_fp16")]; + tensor var_9989 = const()[name = string("op_9989"), val = tensor([0, 2, 1])]; + tensor input_491_axes_0 = const()[name = string("input_491_axes_0"), val = tensor([2])]; + tensor var_9990 = transpose(perm = var_9989, x = hidden_states_175_cast_fp16)[name = string("transpose_32")]; + tensor input_491 = expand_dims(axes = input_491_axes_0, x = var_9990)[name = string("input_491")]; + string gated_103_pad_type_0 = const()[name = string("gated_103_pad_type_0"), val = string("valid")]; + tensor gated_103_strides_0 = const()[name = string("gated_103_strides_0"), val = tensor([1, 1])]; + tensor gated_103_pad_0 = const()[name = string("gated_103_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_103_dilations_0 = const()[name = string("gated_103_dilations_0"), val = tensor([1, 1])]; + int32 gated_103_groups_0 = const()[name = string("gated_103_groups_0"), val = int32(1)]; + tensor gated_103 = conv(dilations = gated_103_dilations_0, groups = gated_103_groups_0, pad = gated_103_pad_0, pad_type = gated_103_pad_type_0, strides = gated_103_strides_0, weight = layers_c3_5_per_layer_input_gate_weight_palettized, x = input_491)[name = string("gated_103")]; + string gated_105_mode_0 = const()[name = string("gated_105_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_105 = gelu(mode = gated_105_mode_0, x = gated_103)[name = string("gated_105")]; + tensor var_10009 = const()[name = string("op_10009"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_35_axes_0 = const()[name = string("per_layer_slice_conv_35_axes_0"), val = tensor([2])]; + tensor var_10010_cast_fp16 = transpose(perm = var_10009, x = per_layer_slice_35_cast_fp16)[name = string("transpose_31")]; + tensor per_layer_slice_conv_35_cast_fp16 = expand_dims(axes = per_layer_slice_conv_35_axes_0, x = var_10010_cast_fp16)[name = string("per_layer_slice_conv_35_cast_fp16")]; + tensor input_493_cast_fp16 = mul(x = gated_105, y = per_layer_slice_conv_35_cast_fp16)[name = string("input_493_cast_fp16")]; + string gated_107_pad_type_0 = const()[name = string("gated_107_pad_type_0"), val = string("valid")]; + tensor gated_107_strides_0 = const()[name = string("gated_107_strides_0"), val = tensor([1, 1])]; + tensor gated_107_pad_0 = const()[name = string("gated_107_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_107_dilations_0 = const()[name = string("gated_107_dilations_0"), val = tensor([1, 1])]; + int32 gated_107_groups_0 = const()[name = string("gated_107_groups_0"), val = int32(1)]; + tensor layers_c3_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(975674752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(976002496))))[name = string("layers_c3_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_107_cast_fp16 = conv(dilations = gated_107_dilations_0, groups = gated_107_groups_0, pad = gated_107_pad_0, pad_type = gated_107_pad_type_0, strides = gated_107_strides_0, weight = layers_c3_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_493_cast_fp16)[name = string("gated_107_cast_fp16")]; + tensor var_10026_axes_0 = const()[name = string("op_10026_axes_0"), val = tensor([2])]; + tensor var_10026_cast_fp16 = squeeze(axes = var_10026_axes_0, x = gated_107_cast_fp16)[name = string("op_10026_cast_fp16")]; + tensor var_10030 = const()[name = string("op_10030"), val = tensor([0, 2, 1])]; + int32 var_10036 = const()[name = string("op_10036"), val = int32(-1)]; + fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_333_cast_fp16 = transpose(perm = var_10030, x = var_10026_cast_fp16)[name = string("transpose_30")]; + tensor var_10038_cast_fp16 = mul(x = x_333_cast_fp16, y = const_186_promoted_to_fp16)[name = string("op_10038_cast_fp16")]; + bool input_495_interleave_0 = const()[name = string("input_495_interleave_0"), val = bool(false)]; + tensor input_495_cast_fp16 = concat(axis = var_10036, interleave = input_495_interleave_0, values = (x_333_cast_fp16, var_10038_cast_fp16))[name = string("input_495_cast_fp16")]; + tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; + fp16 var_10033_to_fp16 = const()[name = string("op_10033_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_10033_to_fp16, x = input_495_cast_fp16)[name = string("normed_477_cast_fp16")]; + tensor var_10043_split_sizes_0 = const()[name = string("op_10043_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10043_axis_0 = const()[name = string("op_10043_axis_0"), val = int32(-1)]; + tensor var_10043_cast_fp16_0, tensor var_10043_cast_fp16_1 = split(axis = var_10043_axis_0, split_sizes = var_10043_split_sizes_0, x = normed_477_cast_fp16)[name = string("op_10043_cast_fp16")]; + tensor layers_c3_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(976005120)))]; + tensor hidden_states_179_cast_fp16 = mul(x = var_10043_cast_fp16_0, y = layers_c3_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_179_cast_fp16")]; + tensor hidden_states_181_cast_fp16 = add(x = hidden_states_175_cast_fp16, y = hidden_states_179_cast_fp16)[name = string("hidden_states_181_cast_fp16")]; + tensor const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = tensor([0x1.aep-2])]; + tensor x_335_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = const_187_promoted_to_fp16)[name = string("x_335_cast_fp16")]; + int32 var_10058 = const()[name = string("op_10058"), val = int32(-1)]; + fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10060_cast_fp16 = mul(x = x_335_cast_fp16, y = const_188_promoted_to_fp16)[name = string("op_10060_cast_fp16")]; + bool input_497_interleave_0 = const()[name = string("input_497_interleave_0"), val = bool(false)]; + tensor input_497_cast_fp16 = concat(axis = var_10058, interleave = input_497_interleave_0, values = (x_335_cast_fp16, var_10060_cast_fp16))[name = string("input_497_cast_fp16")]; + tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; + fp16 var_10055_to_fp16 = const()[name = string("op_10055_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_10055_to_fp16, x = input_497_cast_fp16)[name = string("normed_481_cast_fp16")]; + tensor var_10065_split_sizes_0 = const()[name = string("op_10065_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10065_axis_0 = const()[name = string("op_10065_axis_0"), val = int32(-1)]; + tensor var_10065_cast_fp16_0, tensor var_10065_cast_fp16_1 = split(axis = var_10065_axis_0, split_sizes = var_10065_split_sizes_0, x = normed_481_cast_fp16)[name = string("op_10065_cast_fp16")]; + tensor layers_c3_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(976010304)))]; + tensor h_109_cast_fp16 = mul(x = var_10065_cast_fp16_0, y = layers_c3_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_109_cast_fp16")]; + tensor var_10071 = const()[name = string("op_10071"), val = tensor([0, 2, 1])]; + tensor var_10074_axes_0 = const()[name = string("op_10074_axes_0"), val = tensor([2])]; + tensor var_10072_cast_fp16 = transpose(perm = var_10071, x = h_109_cast_fp16)[name = string("transpose_29")]; + tensor var_10074_cast_fp16 = expand_dims(axes = var_10074_axes_0, x = var_10072_cast_fp16)[name = string("op_10074_cast_fp16")]; + string var_10090_pad_type_0 = const()[name = string("op_10090_pad_type_0"), val = string("valid")]; + tensor var_10090_strides_0 = const()[name = string("op_10090_strides_0"), val = tensor([1, 1])]; + tensor var_10090_pad_0 = const()[name = string("op_10090_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10090_dilations_0 = const()[name = string("op_10090_dilations_0"), val = tensor([1, 1])]; + int32 var_10090_groups_0 = const()[name = string("op_10090_groups_0"), val = int32(1)]; + tensor var_10090 = conv(dilations = var_10090_dilations_0, groups = var_10090_groups_0, pad = var_10090_pad_0, pad_type = var_10090_pad_type_0, strides = var_10090_strides_0, weight = layers_c3_6_self_attn_q_proj_weight_palettized, x = var_10074_cast_fp16)[name = string("op_10090")]; + tensor var_10095 = const()[name = string("op_10095"), val = tensor([1, 8, 256, 1])]; + tensor var_10096 = reshape(shape = var_10095, x = var_10090)[name = string("op_10096")]; + tensor var_10101 = const()[name = string("op_10101"), val = tensor([0, 1, 3, 2])]; + tensor var_10111 = const()[name = string("op_10111"), val = tensor([1, 8, 256])]; + tensor var_10102 = transpose(perm = var_10101, x = var_10096)[name = string("transpose_28")]; + tensor x_337 = reshape(shape = var_10111, x = var_10102)[name = string("x_337")]; + int32 var_10117 = const()[name = string("op_10117"), val = int32(-1)]; + fp16 const_189_promoted = const()[name = string("const_189_promoted"), val = fp16(-0x1p+0)]; + tensor var_10119 = mul(x = x_337, y = const_189_promoted)[name = string("op_10119")]; + bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; + tensor input_501 = concat(axis = var_10117, interleave = input_501_interleave_0, values = (x_337, var_10119))[name = string("input_501")]; + tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; + fp16 var_10114_to_fp16 = const()[name = string("op_10114_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_10114_to_fp16, x = input_501)[name = string("normed_485_cast_fp16")]; + tensor var_10124_split_sizes_0 = const()[name = string("op_10124_split_sizes_0"), val = tensor([256, 256])]; + int32 var_10124_axis_0 = const()[name = string("op_10124_axis_0"), val = int32(-1)]; + tensor var_10124_0, tensor var_10124_1 = split(axis = var_10124_axis_0, split_sizes = var_10124_split_sizes_0, x = normed_485_cast_fp16)[name = string("op_10124")]; + tensor var_10126 = mul(x = var_10124_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_10126")]; + tensor var_10131 = const()[name = string("op_10131"), val = tensor([1, 8, 1, 256])]; + tensor q_135 = reshape(shape = var_10131, x = var_10126)[name = string("q_135")]; + tensor var_10133_cast_fp16 = mul(x = q_135, y = cos_s)[name = string("op_10133_cast_fp16")]; + tensor var_10134_split_sizes_0 = const()[name = string("op_10134_split_sizes_0"), val = tensor([128, 128])]; + int32 var_10134_axis_0 = const()[name = string("op_10134_axis_0"), val = int32(-1)]; + tensor var_10134_0, tensor var_10134_1 = split(axis = var_10134_axis_0, split_sizes = var_10134_split_sizes_0, x = q_135)[name = string("op_10134")]; + fp16 const_190_promoted = const()[name = string("const_190_promoted"), val = fp16(-0x1p+0)]; + tensor var_10136 = mul(x = var_10134_1, y = const_190_promoted)[name = string("op_10136")]; + int32 var_10138 = const()[name = string("op_10138"), val = int32(-1)]; + bool var_10139_interleave_0 = const()[name = string("op_10139_interleave_0"), val = bool(false)]; + tensor var_10139 = concat(axis = var_10138, interleave = var_10139_interleave_0, values = (var_10136, var_10134_0))[name = string("op_10139")]; + tensor var_10140_cast_fp16 = mul(x = var_10139, y = sin_s)[name = string("op_10140_cast_fp16")]; + tensor q_137_cast_fp16 = add(x = var_10133_cast_fp16, y = var_10140_cast_fp16)[name = string("q_137_cast_fp16")]; + bool attn_weights_73_transpose_x_0 = const()[name = string("attn_weights_73_transpose_x_0"), val = bool(false)]; + bool attn_weights_73_transpose_y_0 = const()[name = string("attn_weights_73_transpose_y_0"), val = bool(false)]; + tensor attn_weights_73_cast_fp16 = matmul(transpose_x = attn_weights_73_transpose_x_0, transpose_y = attn_weights_73_transpose_y_0, x = q_137_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_73_cast_fp16")]; + tensor x_339_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask_sliding)[name = string("x_339_cast_fp16")]; + tensor reduce_max_18_axes_0 = const()[name = string("reduce_max_18_axes_0"), val = tensor([-1])]; + bool reduce_max_18_keep_dims_0 = const()[name = string("reduce_max_18_keep_dims_0"), val = bool(true)]; + tensor reduce_max_18 = reduce_max(axes = reduce_max_18_axes_0, keep_dims = reduce_max_18_keep_dims_0, x = x_339_cast_fp16)[name = string("reduce_max_18")]; + tensor var_10172 = sub(x = x_339_cast_fp16, y = reduce_max_18)[name = string("op_10172")]; + tensor var_10178 = exp(x = var_10172)[name = string("op_10178")]; + tensor var_10188_axes_0 = const()[name = string("op_10188_axes_0"), val = tensor([-1])]; + bool var_10188_keep_dims_0 = const()[name = string("op_10188_keep_dims_0"), val = bool(true)]; + tensor var_10188 = reduce_sum(axes = var_10188_axes_0, keep_dims = var_10188_keep_dims_0, x = var_10178)[name = string("op_10188")]; + tensor var_10194_cast_fp16 = real_div(x = var_10178, y = var_10188)[name = string("op_10194_cast_fp16")]; + bool attn_output_109_transpose_x_0 = const()[name = string("attn_output_109_transpose_x_0"), val = bool(false)]; + bool attn_output_109_transpose_y_0 = const()[name = string("attn_output_109_transpose_y_0"), val = bool(false)]; + tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_0, transpose_y = attn_output_109_transpose_y_0, x = var_10194_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_109_cast_fp16")]; + tensor var_10205 = const()[name = string("op_10205"), val = tensor([0, 2, 1, 3])]; + tensor var_10212 = const()[name = string("op_10212"), val = tensor([1, 1, -1])]; + tensor var_10206_cast_fp16 = transpose(perm = var_10205, x = attn_output_109_cast_fp16)[name = string("transpose_27")]; + tensor attn_output_111_cast_fp16 = reshape(shape = var_10212, x = var_10206_cast_fp16)[name = string("attn_output_111_cast_fp16")]; + tensor var_10217 = const()[name = string("op_10217"), val = tensor([0, 2, 1])]; + string var_10233_pad_type_0 = const()[name = string("op_10233_pad_type_0"), val = string("valid")]; + int32 var_10233_groups_0 = const()[name = string("op_10233_groups_0"), val = int32(1)]; + tensor var_10233_strides_0 = const()[name = string("op_10233_strides_0"), val = tensor([1])]; + tensor var_10233_pad_0 = const()[name = string("op_10233_pad_0"), val = tensor([0, 0])]; + tensor var_10233_dilations_0 = const()[name = string("op_10233_dilations_0"), val = tensor([1])]; + tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(976015488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978636992))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10218_cast_fp16 = transpose(perm = var_10217, x = attn_output_111_cast_fp16)[name = string("transpose_26")]; + tensor var_10233_cast_fp16 = conv(dilations = var_10233_dilations_0, groups = var_10233_groups_0, pad = var_10233_pad_0, pad_type = var_10233_pad_type_0, strides = var_10233_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_10218_cast_fp16)[name = string("op_10233_cast_fp16")]; + tensor var_10237 = const()[name = string("op_10237"), val = tensor([0, 2, 1])]; + int32 var_10243 = const()[name = string("op_10243"), val = int32(-1)]; + fp16 const_191_promoted_to_fp16 = const()[name = string("const_191_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_343_cast_fp16 = transpose(perm = var_10237, x = var_10233_cast_fp16)[name = string("transpose_25")]; + tensor var_10245_cast_fp16 = mul(x = x_343_cast_fp16, y = const_191_promoted_to_fp16)[name = string("op_10245_cast_fp16")]; + bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; + tensor input_505_cast_fp16 = concat(axis = var_10243, interleave = input_505_interleave_0, values = (x_343_cast_fp16, var_10245_cast_fp16))[name = string("input_505_cast_fp16")]; + tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; + fp16 var_10240_to_fp16 = const()[name = string("op_10240_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_10240_to_fp16, x = input_505_cast_fp16)[name = string("normed_489_cast_fp16")]; + tensor var_10250_split_sizes_0 = const()[name = string("op_10250_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10250_axis_0 = const()[name = string("op_10250_axis_0"), val = int32(-1)]; + tensor var_10250_cast_fp16_0, tensor var_10250_cast_fp16_1 = split(axis = var_10250_axis_0, split_sizes = var_10250_split_sizes_0, x = normed_489_cast_fp16)[name = string("op_10250_cast_fp16")]; + tensor layers_c3_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978639616)))]; + tensor attn_output_113_cast_fp16 = mul(x = var_10250_cast_fp16_0, y = layers_c3_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_113_cast_fp16")]; + tensor x_345_cast_fp16 = add(x = x_335_cast_fp16, y = attn_output_113_cast_fp16)[name = string("x_345_cast_fp16")]; + int32 var_10259 = const()[name = string("op_10259"), val = int32(-1)]; + fp16 const_192_promoted_to_fp16 = const()[name = string("const_192_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10261_cast_fp16 = mul(x = x_345_cast_fp16, y = const_192_promoted_to_fp16)[name = string("op_10261_cast_fp16")]; + bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; + tensor input_507_cast_fp16 = concat(axis = var_10259, interleave = input_507_interleave_0, values = (x_345_cast_fp16, var_10261_cast_fp16))[name = string("input_507_cast_fp16")]; + tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; + fp16 var_10256_to_fp16 = const()[name = string("op_10256_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_10256_to_fp16, x = input_507_cast_fp16)[name = string("normed_493_cast_fp16")]; + tensor var_10266_split_sizes_0 = const()[name = string("op_10266_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10266_axis_0 = const()[name = string("op_10266_axis_0"), val = int32(-1)]; + tensor var_10266_cast_fp16_0, tensor var_10266_cast_fp16_1 = split(axis = var_10266_axis_0, split_sizes = var_10266_split_sizes_0, x = normed_493_cast_fp16)[name = string("op_10266_cast_fp16")]; + tensor layers_c3_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978644800)))]; + tensor h_111_cast_fp16 = mul(x = var_10266_cast_fp16_0, y = layers_c3_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_111_cast_fp16")]; + tensor var_10277 = const()[name = string("op_10277"), val = tensor([0, 2, 1])]; + tensor input_509_axes_0 = const()[name = string("input_509_axes_0"), val = tensor([2])]; + tensor var_10278 = transpose(perm = var_10277, x = h_111_cast_fp16)[name = string("transpose_24")]; + tensor input_509 = expand_dims(axes = input_509_axes_0, x = var_10278)[name = string("input_509")]; + string gate_73_pad_type_0 = const()[name = string("gate_73_pad_type_0"), val = string("valid")]; + tensor gate_73_strides_0 = const()[name = string("gate_73_strides_0"), val = tensor([1, 1])]; + tensor gate_73_pad_0 = const()[name = string("gate_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_73_dilations_0 = const()[name = string("gate_73_dilations_0"), val = tensor([1, 1])]; + int32 gate_73_groups_0 = const()[name = string("gate_73_groups_0"), val = int32(1)]; + tensor gate_73 = conv(dilations = gate_73_dilations_0, groups = gate_73_groups_0, pad = gate_73_pad_0, pad_type = gate_73_pad_type_0, strides = gate_73_strides_0, weight = layers_c3_6_mlp_gate_proj_weight_palettized, x = input_509)[name = string("gate_73")]; + string up_37_pad_type_0 = const()[name = string("up_37_pad_type_0"), val = string("valid")]; + tensor up_37_strides_0 = const()[name = string("up_37_strides_0"), val = tensor([1, 1])]; + tensor up_37_pad_0 = const()[name = string("up_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_37_dilations_0 = const()[name = string("up_37_dilations_0"), val = tensor([1, 1])]; + int32 up_37_groups_0 = const()[name = string("up_37_groups_0"), val = int32(1)]; + tensor up_37 = conv(dilations = up_37_dilations_0, groups = up_37_groups_0, pad = up_37_pad_0, pad_type = up_37_pad_type_0, strides = up_37_strides_0, weight = layers_c3_6_mlp_up_proj_weight_palettized, x = input_509)[name = string("up_37")]; + string gate_75_mode_0 = const()[name = string("gate_75_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_75 = gelu(mode = gate_75_mode_0, x = gate_73)[name = string("gate_75")]; + tensor input_511 = mul(x = gate_75, y = up_37)[name = string("input_511")]; + string mlp_out_37_pad_type_0 = const()[name = string("mlp_out_37_pad_type_0"), val = string("valid")]; + tensor mlp_out_37_strides_0 = const()[name = string("mlp_out_37_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_37_pad_0 = const()[name = string("mlp_out_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_37_dilations_0 = const()[name = string("mlp_out_37_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_37_groups_0 = const()[name = string("mlp_out_37_groups_0"), val = int32(1)]; + tensor mlp_out_37 = conv(dilations = mlp_out_37_dilations_0, groups = mlp_out_37_groups_0, pad = mlp_out_37_pad_0, pad_type = mlp_out_37_pad_type_0, strides = mlp_out_37_strides_0, weight = layers_c3_6_mlp_down_proj_weight_palettized, x = input_511)[name = string("mlp_out_37")]; + tensor var_10318_axes_0 = const()[name = string("op_10318_axes_0"), val = tensor([2])]; + tensor var_10318 = squeeze(axes = var_10318_axes_0, x = mlp_out_37)[name = string("op_10318")]; + tensor var_10322 = const()[name = string("op_10322"), val = tensor([0, 2, 1])]; + int32 var_10328 = const()[name = string("op_10328"), val = int32(-1)]; + fp16 const_193_promoted = const()[name = string("const_193_promoted"), val = fp16(-0x1p+0)]; + tensor x_347 = transpose(perm = var_10322, x = var_10318)[name = string("transpose_23")]; + tensor var_10330 = mul(x = x_347, y = const_193_promoted)[name = string("op_10330")]; + bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; + tensor input_513 = concat(axis = var_10328, interleave = input_513_interleave_0, values = (x_347, var_10330))[name = string("input_513")]; + tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; + fp16 var_10325_to_fp16 = const()[name = string("op_10325_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_10325_to_fp16, x = input_513)[name = string("normed_497_cast_fp16")]; + tensor var_10335_split_sizes_0 = const()[name = string("op_10335_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10335_axis_0 = const()[name = string("op_10335_axis_0"), val = int32(-1)]; + tensor var_10335_0, tensor var_10335_1 = split(axis = var_10335_axis_0, split_sizes = var_10335_split_sizes_0, x = normed_497_cast_fp16)[name = string("op_10335")]; + tensor hidden_states_183 = mul(x = var_10335_0, y = layers_c3_6_post_feedforward_layernorm_weight)[name = string("hidden_states_183")]; + tensor hidden_states_185_cast_fp16 = add(x = x_345_cast_fp16, y = hidden_states_183)[name = string("hidden_states_185_cast_fp16")]; + tensor per_layer_slice_37_begin_0 = const()[name = string("per_layer_slice_37_begin_0"), val = tensor([0, 0, 7680])]; + tensor per_layer_slice_37_end_0 = const()[name = string("per_layer_slice_37_end_0"), val = tensor([1, 1, 7936])]; + tensor per_layer_slice_37_end_mask_0 = const()[name = string("per_layer_slice_37_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_37_cast_fp16 = slice_by_index(begin = per_layer_slice_37_begin_0, end = per_layer_slice_37_end_0, end_mask = per_layer_slice_37_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_37_cast_fp16")]; + tensor var_10363 = const()[name = string("op_10363"), val = tensor([0, 2, 1])]; + tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; + tensor var_10364 = transpose(perm = var_10363, x = hidden_states_185_cast_fp16)[name = string("transpose_22")]; + tensor input_515 = expand_dims(axes = input_515_axes_0, x = var_10364)[name = string("input_515")]; + string gated_109_pad_type_0 = const()[name = string("gated_109_pad_type_0"), val = string("valid")]; + tensor gated_109_strides_0 = const()[name = string("gated_109_strides_0"), val = tensor([1, 1])]; + tensor gated_109_pad_0 = const()[name = string("gated_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_109_dilations_0 = const()[name = string("gated_109_dilations_0"), val = tensor([1, 1])]; + int32 gated_109_groups_0 = const()[name = string("gated_109_groups_0"), val = int32(1)]; + tensor gated_109 = conv(dilations = gated_109_dilations_0, groups = gated_109_groups_0, pad = gated_109_pad_0, pad_type = gated_109_pad_type_0, strides = gated_109_strides_0, weight = layers_c3_6_per_layer_input_gate_weight_palettized, x = input_515)[name = string("gated_109")]; + string gated_111_mode_0 = const()[name = string("gated_111_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_111 = gelu(mode = gated_111_mode_0, x = gated_109)[name = string("gated_111")]; + tensor var_10383 = const()[name = string("op_10383"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_37_axes_0 = const()[name = string("per_layer_slice_conv_37_axes_0"), val = tensor([2])]; + tensor var_10384_cast_fp16 = transpose(perm = var_10383, x = per_layer_slice_37_cast_fp16)[name = string("transpose_21")]; + tensor per_layer_slice_conv_37_cast_fp16 = expand_dims(axes = per_layer_slice_conv_37_axes_0, x = var_10384_cast_fp16)[name = string("per_layer_slice_conv_37_cast_fp16")]; + tensor input_517_cast_fp16 = mul(x = gated_111, y = per_layer_slice_conv_37_cast_fp16)[name = string("input_517_cast_fp16")]; + string gated_113_pad_type_0 = const()[name = string("gated_113_pad_type_0"), val = string("valid")]; + tensor gated_113_strides_0 = const()[name = string("gated_113_strides_0"), val = tensor([1, 1])]; + tensor gated_113_pad_0 = const()[name = string("gated_113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_113_dilations_0 = const()[name = string("gated_113_dilations_0"), val = tensor([1, 1])]; + int32 gated_113_groups_0 = const()[name = string("gated_113_groups_0"), val = int32(1)]; + tensor layers_c3_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978649984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978977728))))[name = string("layers_c3_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_113_cast_fp16 = conv(dilations = gated_113_dilations_0, groups = gated_113_groups_0, pad = gated_113_pad_0, pad_type = gated_113_pad_type_0, strides = gated_113_strides_0, weight = layers_c3_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("gated_113_cast_fp16")]; + tensor var_10400_axes_0 = const()[name = string("op_10400_axes_0"), val = tensor([2])]; + tensor var_10400_cast_fp16 = squeeze(axes = var_10400_axes_0, x = gated_113_cast_fp16)[name = string("op_10400_cast_fp16")]; + tensor var_10404 = const()[name = string("op_10404"), val = tensor([0, 2, 1])]; + int32 var_10410 = const()[name = string("op_10410"), val = int32(-1)]; + fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_349_cast_fp16 = transpose(perm = var_10404, x = var_10400_cast_fp16)[name = string("transpose_20")]; + tensor var_10412_cast_fp16 = mul(x = x_349_cast_fp16, y = const_194_promoted_to_fp16)[name = string("op_10412_cast_fp16")]; + bool input_519_interleave_0 = const()[name = string("input_519_interleave_0"), val = bool(false)]; + tensor input_519_cast_fp16 = concat(axis = var_10410, interleave = input_519_interleave_0, values = (x_349_cast_fp16, var_10412_cast_fp16))[name = string("input_519_cast_fp16")]; + tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; + fp16 var_10407_to_fp16 = const()[name = string("op_10407_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_10407_to_fp16, x = input_519_cast_fp16)[name = string("normed_501_cast_fp16")]; + tensor var_10417_split_sizes_0 = const()[name = string("op_10417_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10417_axis_0 = const()[name = string("op_10417_axis_0"), val = int32(-1)]; + tensor var_10417_cast_fp16_0, tensor var_10417_cast_fp16_1 = split(axis = var_10417_axis_0, split_sizes = var_10417_split_sizes_0, x = normed_501_cast_fp16)[name = string("op_10417_cast_fp16")]; + tensor layers_c3_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978980352)))]; + tensor hidden_states_189_cast_fp16 = mul(x = var_10417_cast_fp16_0, y = layers_c3_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_189_cast_fp16")]; + tensor hidden_states_191_cast_fp16 = add(x = hidden_states_185_cast_fp16, y = hidden_states_189_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + tensor const_195_promoted_to_fp16 = const()[name = string("const_195_promoted_to_fp16"), val = tensor([0x1.6cp-1])]; + tensor x_351_cast_fp16 = mul(x = hidden_states_191_cast_fp16, y = const_195_promoted_to_fp16)[name = string("x_351_cast_fp16")]; + int32 var_10432 = const()[name = string("op_10432"), val = int32(-1)]; + fp16 const_196_promoted_to_fp16 = const()[name = string("const_196_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10434_cast_fp16 = mul(x = x_351_cast_fp16, y = const_196_promoted_to_fp16)[name = string("op_10434_cast_fp16")]; + bool input_521_interleave_0 = const()[name = string("input_521_interleave_0"), val = bool(false)]; + tensor input_521_cast_fp16 = concat(axis = var_10432, interleave = input_521_interleave_0, values = (x_351_cast_fp16, var_10434_cast_fp16))[name = string("input_521_cast_fp16")]; + tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; + fp16 var_10429_to_fp16 = const()[name = string("op_10429_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_10429_to_fp16, x = input_521_cast_fp16)[name = string("normed_505_cast_fp16")]; + tensor var_10439_split_sizes_0 = const()[name = string("op_10439_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10439_axis_0 = const()[name = string("op_10439_axis_0"), val = int32(-1)]; + tensor var_10439_cast_fp16_0, tensor var_10439_cast_fp16_1 = split(axis = var_10439_axis_0, split_sizes = var_10439_split_sizes_0, x = normed_505_cast_fp16)[name = string("op_10439_cast_fp16")]; + tensor layers_c3_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978985536)))]; + tensor h_115_cast_fp16 = mul(x = var_10439_cast_fp16_0, y = layers_c3_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_115_cast_fp16")]; + tensor var_10445 = const()[name = string("op_10445"), val = tensor([0, 2, 1])]; + tensor var_10448_axes_0 = const()[name = string("op_10448_axes_0"), val = tensor([2])]; + tensor var_10446_cast_fp16 = transpose(perm = var_10445, x = h_115_cast_fp16)[name = string("transpose_19")]; + tensor var_10448_cast_fp16 = expand_dims(axes = var_10448_axes_0, x = var_10446_cast_fp16)[name = string("op_10448_cast_fp16")]; + string var_10464_pad_type_0 = const()[name = string("op_10464_pad_type_0"), val = string("valid")]; + tensor var_10464_strides_0 = const()[name = string("op_10464_strides_0"), val = tensor([1, 1])]; + tensor var_10464_pad_0 = const()[name = string("op_10464_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10464_dilations_0 = const()[name = string("op_10464_dilations_0"), val = tensor([1, 1])]; + int32 var_10464_groups_0 = const()[name = string("op_10464_groups_0"), val = int32(1)]; + tensor var_10464 = conv(dilations = var_10464_dilations_0, groups = var_10464_groups_0, pad = var_10464_pad_0, pad_type = var_10464_pad_type_0, strides = var_10464_strides_0, weight = layers_c3_7_self_attn_q_proj_weight_palettized, x = var_10448_cast_fp16)[name = string("op_10464")]; + tensor var_10469 = const()[name = string("op_10469"), val = tensor([1, 8, 256, 1])]; + tensor var_10470 = reshape(shape = var_10469, x = var_10464)[name = string("op_10470")]; + tensor var_10475 = const()[name = string("op_10475"), val = tensor([0, 1, 3, 2])]; + tensor var_10485 = const()[name = string("op_10485"), val = tensor([1, 8, 256])]; + tensor var_10476 = transpose(perm = var_10475, x = var_10470)[name = string("transpose_18")]; + tensor x_353 = reshape(shape = var_10485, x = var_10476)[name = string("x_353")]; + int32 var_10491 = const()[name = string("op_10491"), val = int32(-1)]; + fp16 const_197_promoted = const()[name = string("const_197_promoted"), val = fp16(-0x1p+0)]; + tensor var_10493 = mul(x = x_353, y = const_197_promoted)[name = string("op_10493")]; + bool input_525_interleave_0 = const()[name = string("input_525_interleave_0"), val = bool(false)]; + tensor input_525 = concat(axis = var_10491, interleave = input_525_interleave_0, values = (x_353, var_10493))[name = string("input_525")]; + tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; + fp16 var_10488_to_fp16 = const()[name = string("op_10488_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_10488_to_fp16, x = input_525)[name = string("normed_509_cast_fp16")]; + tensor var_10498_split_sizes_0 = const()[name = string("op_10498_split_sizes_0"), val = tensor([256, 256])]; + int32 var_10498_axis_0 = const()[name = string("op_10498_axis_0"), val = int32(-1)]; + tensor var_10498_0, tensor var_10498_1 = split(axis = var_10498_axis_0, split_sizes = var_10498_split_sizes_0, x = normed_509_cast_fp16)[name = string("op_10498")]; + tensor var_10500 = mul(x = var_10498_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_10500")]; + tensor var_10505 = const()[name = string("op_10505"), val = tensor([1, 8, 1, 256])]; + tensor q_141 = reshape(shape = var_10505, x = var_10500)[name = string("q_141")]; + tensor var_10507_cast_fp16 = mul(x = q_141, y = cos_s)[name = string("op_10507_cast_fp16")]; + tensor var_10508_split_sizes_0 = const()[name = string("op_10508_split_sizes_0"), val = tensor([128, 128])]; + int32 var_10508_axis_0 = const()[name = string("op_10508_axis_0"), val = int32(-1)]; + tensor var_10508_0, tensor var_10508_1 = split(axis = var_10508_axis_0, split_sizes = var_10508_split_sizes_0, x = q_141)[name = string("op_10508")]; + fp16 const_198_promoted = const()[name = string("const_198_promoted"), val = fp16(-0x1p+0)]; + tensor var_10510 = mul(x = var_10508_1, y = const_198_promoted)[name = string("op_10510")]; + int32 var_10512 = const()[name = string("op_10512"), val = int32(-1)]; + bool var_10513_interleave_0 = const()[name = string("op_10513_interleave_0"), val = bool(false)]; + tensor var_10513 = concat(axis = var_10512, interleave = var_10513_interleave_0, values = (var_10510, var_10508_0))[name = string("op_10513")]; + tensor var_10514_cast_fp16 = mul(x = var_10513, y = sin_s)[name = string("op_10514_cast_fp16")]; + tensor q_143_cast_fp16 = add(x = var_10507_cast_fp16, y = var_10514_cast_fp16)[name = string("q_143_cast_fp16")]; + bool attn_weights_77_transpose_x_0 = const()[name = string("attn_weights_77_transpose_x_0"), val = bool(false)]; + bool attn_weights_77_transpose_y_0 = const()[name = string("attn_weights_77_transpose_y_0"), val = bool(false)]; + tensor attn_weights_77_cast_fp16 = matmul(transpose_x = attn_weights_77_transpose_x_0, transpose_y = attn_weights_77_transpose_y_0, x = q_143_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + tensor x_355_cast_fp16 = add(x = attn_weights_77_cast_fp16, y = causal_mask_sliding)[name = string("x_355_cast_fp16")]; + tensor reduce_max_19_axes_0 = const()[name = string("reduce_max_19_axes_0"), val = tensor([-1])]; + bool reduce_max_19_keep_dims_0 = const()[name = string("reduce_max_19_keep_dims_0"), val = bool(true)]; + tensor reduce_max_19 = reduce_max(axes = reduce_max_19_axes_0, keep_dims = reduce_max_19_keep_dims_0, x = x_355_cast_fp16)[name = string("reduce_max_19")]; + tensor var_10546 = sub(x = x_355_cast_fp16, y = reduce_max_19)[name = string("op_10546")]; + tensor var_10552 = exp(x = var_10546)[name = string("op_10552")]; + tensor var_10562_axes_0 = const()[name = string("op_10562_axes_0"), val = tensor([-1])]; + bool var_10562_keep_dims_0 = const()[name = string("op_10562_keep_dims_0"), val = bool(true)]; + tensor var_10562 = reduce_sum(axes = var_10562_axes_0, keep_dims = var_10562_keep_dims_0, x = var_10552)[name = string("op_10562")]; + tensor var_10568_cast_fp16 = real_div(x = var_10552, y = var_10562)[name = string("op_10568_cast_fp16")]; + bool attn_output_115_transpose_x_0 = const()[name = string("attn_output_115_transpose_x_0"), val = bool(false)]; + bool attn_output_115_transpose_y_0 = const()[name = string("attn_output_115_transpose_y_0"), val = bool(false)]; + tensor attn_output_115_cast_fp16 = matmul(transpose_x = attn_output_115_transpose_x_0, transpose_y = attn_output_115_transpose_y_0, x = var_10568_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_115_cast_fp16")]; + tensor var_10579 = const()[name = string("op_10579"), val = tensor([0, 2, 1, 3])]; + tensor var_10586 = const()[name = string("op_10586"), val = tensor([1, 1, -1])]; + tensor var_10580_cast_fp16 = transpose(perm = var_10579, x = attn_output_115_cast_fp16)[name = string("transpose_17")]; + tensor attn_output_117_cast_fp16 = reshape(shape = var_10586, x = var_10580_cast_fp16)[name = string("attn_output_117_cast_fp16")]; + tensor var_10591 = const()[name = string("op_10591"), val = tensor([0, 2, 1])]; + string var_10607_pad_type_0 = const()[name = string("op_10607_pad_type_0"), val = string("valid")]; + int32 var_10607_groups_0 = const()[name = string("op_10607_groups_0"), val = int32(1)]; + tensor var_10607_strides_0 = const()[name = string("op_10607_strides_0"), val = tensor([1])]; + tensor var_10607_pad_0 = const()[name = string("op_10607_pad_0"), val = tensor([0, 0])]; + tensor var_10607_dilations_0 = const()[name = string("op_10607_dilations_0"), val = tensor([1])]; + tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978990720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981612224))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10592_cast_fp16 = transpose(perm = var_10591, x = attn_output_117_cast_fp16)[name = string("transpose_16")]; + tensor var_10607_cast_fp16 = conv(dilations = var_10607_dilations_0, groups = var_10607_groups_0, pad = var_10607_pad_0, pad_type = var_10607_pad_type_0, strides = var_10607_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_10592_cast_fp16)[name = string("op_10607_cast_fp16")]; + tensor var_10611 = const()[name = string("op_10611"), val = tensor([0, 2, 1])]; + int32 var_10617 = const()[name = string("op_10617"), val = int32(-1)]; + fp16 const_199_promoted_to_fp16 = const()[name = string("const_199_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_359_cast_fp16 = transpose(perm = var_10611, x = var_10607_cast_fp16)[name = string("transpose_15")]; + tensor var_10619_cast_fp16 = mul(x = x_359_cast_fp16, y = const_199_promoted_to_fp16)[name = string("op_10619_cast_fp16")]; + bool input_529_interleave_0 = const()[name = string("input_529_interleave_0"), val = bool(false)]; + tensor input_529_cast_fp16 = concat(axis = var_10617, interleave = input_529_interleave_0, values = (x_359_cast_fp16, var_10619_cast_fp16))[name = string("input_529_cast_fp16")]; + tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; + fp16 var_10614_to_fp16 = const()[name = string("op_10614_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_10614_to_fp16, x = input_529_cast_fp16)[name = string("normed_513_cast_fp16")]; + tensor var_10624_split_sizes_0 = const()[name = string("op_10624_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10624_axis_0 = const()[name = string("op_10624_axis_0"), val = int32(-1)]; + tensor var_10624_cast_fp16_0, tensor var_10624_cast_fp16_1 = split(axis = var_10624_axis_0, split_sizes = var_10624_split_sizes_0, x = normed_513_cast_fp16)[name = string("op_10624_cast_fp16")]; + tensor layers_c3_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981614848)))]; + tensor attn_output_119_cast_fp16 = mul(x = var_10624_cast_fp16_0, y = layers_c3_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_119_cast_fp16")]; + tensor x_361_cast_fp16 = add(x = x_351_cast_fp16, y = attn_output_119_cast_fp16)[name = string("x_361_cast_fp16")]; + int32 var_10633 = const()[name = string("op_10633"), val = int32(-1)]; + fp16 const_200_promoted_to_fp16 = const()[name = string("const_200_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10635_cast_fp16 = mul(x = x_361_cast_fp16, y = const_200_promoted_to_fp16)[name = string("op_10635_cast_fp16")]; + bool input_531_interleave_0 = const()[name = string("input_531_interleave_0"), val = bool(false)]; + tensor input_531_cast_fp16 = concat(axis = var_10633, interleave = input_531_interleave_0, values = (x_361_cast_fp16, var_10635_cast_fp16))[name = string("input_531_cast_fp16")]; + tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; + fp16 var_10630_to_fp16 = const()[name = string("op_10630_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_10630_to_fp16, x = input_531_cast_fp16)[name = string("normed_517_cast_fp16")]; + tensor var_10640_split_sizes_0 = const()[name = string("op_10640_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10640_axis_0 = const()[name = string("op_10640_axis_0"), val = int32(-1)]; + tensor var_10640_cast_fp16_0, tensor var_10640_cast_fp16_1 = split(axis = var_10640_axis_0, split_sizes = var_10640_split_sizes_0, x = normed_517_cast_fp16)[name = string("op_10640_cast_fp16")]; + tensor layers_c3_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981620032)))]; + tensor h_117_cast_fp16 = mul(x = var_10640_cast_fp16_0, y = layers_c3_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_117_cast_fp16")]; + tensor var_10651 = const()[name = string("op_10651"), val = tensor([0, 2, 1])]; + tensor input_533_axes_0 = const()[name = string("input_533_axes_0"), val = tensor([2])]; + tensor var_10652 = transpose(perm = var_10651, x = h_117_cast_fp16)[name = string("transpose_14")]; + tensor input_533 = expand_dims(axes = input_533_axes_0, x = var_10652)[name = string("input_533")]; + string gate_77_pad_type_0 = const()[name = string("gate_77_pad_type_0"), val = string("valid")]; + tensor gate_77_strides_0 = const()[name = string("gate_77_strides_0"), val = tensor([1, 1])]; + tensor gate_77_pad_0 = const()[name = string("gate_77_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_77_dilations_0 = const()[name = string("gate_77_dilations_0"), val = tensor([1, 1])]; + int32 gate_77_groups_0 = const()[name = string("gate_77_groups_0"), val = int32(1)]; + tensor gate_77 = conv(dilations = gate_77_dilations_0, groups = gate_77_groups_0, pad = gate_77_pad_0, pad_type = gate_77_pad_type_0, strides = gate_77_strides_0, weight = layers_c3_7_mlp_gate_proj_weight_palettized, x = input_533)[name = string("gate_77")]; + string up_39_pad_type_0 = const()[name = string("up_39_pad_type_0"), val = string("valid")]; + tensor up_39_strides_0 = const()[name = string("up_39_strides_0"), val = tensor([1, 1])]; + tensor up_39_pad_0 = const()[name = string("up_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_39_dilations_0 = const()[name = string("up_39_dilations_0"), val = tensor([1, 1])]; + int32 up_39_groups_0 = const()[name = string("up_39_groups_0"), val = int32(1)]; + tensor up_39 = conv(dilations = up_39_dilations_0, groups = up_39_groups_0, pad = up_39_pad_0, pad_type = up_39_pad_type_0, strides = up_39_strides_0, weight = layers_c3_7_mlp_up_proj_weight_palettized, x = input_533)[name = string("up_39")]; + string gate_79_mode_0 = const()[name = string("gate_79_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_79 = gelu(mode = gate_79_mode_0, x = gate_77)[name = string("gate_79")]; + tensor input_535 = mul(x = gate_79, y = up_39)[name = string("input_535")]; + string mlp_out_39_pad_type_0 = const()[name = string("mlp_out_39_pad_type_0"), val = string("valid")]; + tensor mlp_out_39_strides_0 = const()[name = string("mlp_out_39_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_39_pad_0 = const()[name = string("mlp_out_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_39_dilations_0 = const()[name = string("mlp_out_39_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_39_groups_0 = const()[name = string("mlp_out_39_groups_0"), val = int32(1)]; + tensor mlp_out_39 = conv(dilations = mlp_out_39_dilations_0, groups = mlp_out_39_groups_0, pad = mlp_out_39_pad_0, pad_type = mlp_out_39_pad_type_0, strides = mlp_out_39_strides_0, weight = layers_c3_7_mlp_down_proj_weight_palettized, x = input_535)[name = string("mlp_out_39")]; + tensor var_10692_axes_0 = const()[name = string("op_10692_axes_0"), val = tensor([2])]; + tensor var_10692 = squeeze(axes = var_10692_axes_0, x = mlp_out_39)[name = string("op_10692")]; + tensor var_10696 = const()[name = string("op_10696"), val = tensor([0, 2, 1])]; + int32 var_10702 = const()[name = string("op_10702"), val = int32(-1)]; + fp16 const_201_promoted = const()[name = string("const_201_promoted"), val = fp16(-0x1p+0)]; + tensor x_363 = transpose(perm = var_10696, x = var_10692)[name = string("transpose_13")]; + tensor var_10704 = mul(x = x_363, y = const_201_promoted)[name = string("op_10704")]; + bool input_537_interleave_0 = const()[name = string("input_537_interleave_0"), val = bool(false)]; + tensor input_537 = concat(axis = var_10702, interleave = input_537_interleave_0, values = (x_363, var_10704))[name = string("input_537")]; + tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; + fp16 var_10699_to_fp16 = const()[name = string("op_10699_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_10699_to_fp16, x = input_537)[name = string("normed_521_cast_fp16")]; + tensor var_10709_split_sizes_0 = const()[name = string("op_10709_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10709_axis_0 = const()[name = string("op_10709_axis_0"), val = int32(-1)]; + tensor var_10709_0, tensor var_10709_1 = split(axis = var_10709_axis_0, split_sizes = var_10709_split_sizes_0, x = normed_521_cast_fp16)[name = string("op_10709")]; + tensor hidden_states_193 = mul(x = var_10709_0, y = layers_c3_7_post_feedforward_layernorm_weight)[name = string("hidden_states_193")]; + tensor hidden_states_195_cast_fp16 = add(x = x_361_cast_fp16, y = hidden_states_193)[name = string("hidden_states_195_cast_fp16")]; + tensor per_layer_slice_39_begin_0 = const()[name = string("per_layer_slice_39_begin_0"), val = tensor([0, 0, 7936])]; + tensor per_layer_slice_39_end_0 = const()[name = string("per_layer_slice_39_end_0"), val = tensor([1, 1, 8192])]; + tensor per_layer_slice_39_end_mask_0 = const()[name = string("per_layer_slice_39_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_39_cast_fp16 = slice_by_index(begin = per_layer_slice_39_begin_0, end = per_layer_slice_39_end_0, end_mask = per_layer_slice_39_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_39_cast_fp16")]; + tensor var_10737 = const()[name = string("op_10737"), val = tensor([0, 2, 1])]; + tensor input_539_axes_0 = const()[name = string("input_539_axes_0"), val = tensor([2])]; + tensor var_10738 = transpose(perm = var_10737, x = hidden_states_195_cast_fp16)[name = string("transpose_12")]; + tensor input_539 = expand_dims(axes = input_539_axes_0, x = var_10738)[name = string("input_539")]; + string gated_115_pad_type_0 = const()[name = string("gated_115_pad_type_0"), val = string("valid")]; + tensor gated_115_strides_0 = const()[name = string("gated_115_strides_0"), val = tensor([1, 1])]; + tensor gated_115_pad_0 = const()[name = string("gated_115_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_115_dilations_0 = const()[name = string("gated_115_dilations_0"), val = tensor([1, 1])]; + int32 gated_115_groups_0 = const()[name = string("gated_115_groups_0"), val = int32(1)]; + tensor gated_115 = conv(dilations = gated_115_dilations_0, groups = gated_115_groups_0, pad = gated_115_pad_0, pad_type = gated_115_pad_type_0, strides = gated_115_strides_0, weight = layers_c3_7_per_layer_input_gate_weight_palettized, x = input_539)[name = string("gated_115")]; + string gated_117_mode_0 = const()[name = string("gated_117_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_117 = gelu(mode = gated_117_mode_0, x = gated_115)[name = string("gated_117")]; + tensor var_10757 = const()[name = string("op_10757"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_39_axes_0 = const()[name = string("per_layer_slice_conv_39_axes_0"), val = tensor([2])]; + tensor var_10758_cast_fp16 = transpose(perm = var_10757, x = per_layer_slice_39_cast_fp16)[name = string("transpose_11")]; + tensor per_layer_slice_conv_39_cast_fp16 = expand_dims(axes = per_layer_slice_conv_39_axes_0, x = var_10758_cast_fp16)[name = string("per_layer_slice_conv_39_cast_fp16")]; + tensor input_541_cast_fp16 = mul(x = gated_117, y = per_layer_slice_conv_39_cast_fp16)[name = string("input_541_cast_fp16")]; + string gated_119_pad_type_0 = const()[name = string("gated_119_pad_type_0"), val = string("valid")]; + tensor gated_119_strides_0 = const()[name = string("gated_119_strides_0"), val = tensor([1, 1])]; + tensor gated_119_pad_0 = const()[name = string("gated_119_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_119_dilations_0 = const()[name = string("gated_119_dilations_0"), val = tensor([1, 1])]; + int32 gated_119_groups_0 = const()[name = string("gated_119_groups_0"), val = int32(1)]; + tensor layers_c3_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981625216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981952960))))[name = string("layers_c3_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_119_cast_fp16 = conv(dilations = gated_119_dilations_0, groups = gated_119_groups_0, pad = gated_119_pad_0, pad_type = gated_119_pad_type_0, strides = gated_119_strides_0, weight = layers_c3_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_541_cast_fp16)[name = string("gated_119_cast_fp16")]; + tensor var_10774_axes_0 = const()[name = string("op_10774_axes_0"), val = tensor([2])]; + tensor var_10774_cast_fp16 = squeeze(axes = var_10774_axes_0, x = gated_119_cast_fp16)[name = string("op_10774_cast_fp16")]; + tensor var_10778 = const()[name = string("op_10778"), val = tensor([0, 2, 1])]; + int32 var_10784 = const()[name = string("op_10784"), val = int32(-1)]; + fp16 const_202_promoted_to_fp16 = const()[name = string("const_202_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_365_cast_fp16 = transpose(perm = var_10778, x = var_10774_cast_fp16)[name = string("transpose_10")]; + tensor var_10786_cast_fp16 = mul(x = x_365_cast_fp16, y = const_202_promoted_to_fp16)[name = string("op_10786_cast_fp16")]; + bool input_543_interleave_0 = const()[name = string("input_543_interleave_0"), val = bool(false)]; + tensor input_543_cast_fp16 = concat(axis = var_10784, interleave = input_543_interleave_0, values = (x_365_cast_fp16, var_10786_cast_fp16))[name = string("input_543_cast_fp16")]; + tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; + fp16 var_10781_to_fp16 = const()[name = string("op_10781_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_10781_to_fp16, x = input_543_cast_fp16)[name = string("normed_525_cast_fp16")]; + tensor var_10791_split_sizes_0 = const()[name = string("op_10791_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10791_axis_0 = const()[name = string("op_10791_axis_0"), val = int32(-1)]; + tensor var_10791_cast_fp16_0, tensor var_10791_cast_fp16_1 = split(axis = var_10791_axis_0, split_sizes = var_10791_split_sizes_0, x = normed_525_cast_fp16)[name = string("op_10791_cast_fp16")]; + tensor layers_c3_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981955584)))]; + tensor hidden_states_199_cast_fp16 = mul(x = var_10791_cast_fp16_0, y = layers_c3_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_199_cast_fp16")]; + tensor hidden_states_201_cast_fp16 = add(x = hidden_states_195_cast_fp16, y = hidden_states_199_cast_fp16)[name = string("hidden_states_201_cast_fp16")]; + tensor const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = tensor([0x1.a2p-1])]; + tensor x_367_cast_fp16 = mul(x = hidden_states_201_cast_fp16, y = const_203_promoted_to_fp16)[name = string("x_367_cast_fp16")]; + int32 var_10806 = const()[name = string("op_10806"), val = int32(-1)]; + fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10808_cast_fp16 = mul(x = x_367_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_10808_cast_fp16")]; + bool input_545_interleave_0 = const()[name = string("input_545_interleave_0"), val = bool(false)]; + tensor input_545_cast_fp16 = concat(axis = var_10806, interleave = input_545_interleave_0, values = (x_367_cast_fp16, var_10808_cast_fp16))[name = string("input_545_cast_fp16")]; + tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; + fp16 var_10803_to_fp16 = const()[name = string("op_10803_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_10803_to_fp16, x = input_545_cast_fp16)[name = string("normed_529_cast_fp16")]; + tensor var_10813_split_sizes_0 = const()[name = string("op_10813_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10813_axis_0 = const()[name = string("op_10813_axis_0"), val = int32(-1)]; + tensor var_10813_cast_fp16_0, tensor var_10813_cast_fp16_1 = split(axis = var_10813_axis_0, split_sizes = var_10813_split_sizes_0, x = normed_529_cast_fp16)[name = string("op_10813_cast_fp16")]; + tensor layers_c3_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981960768)))]; + tensor h_121_cast_fp16 = mul(x = var_10813_cast_fp16_0, y = layers_c3_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_121_cast_fp16")]; + tensor var_10819 = const()[name = string("op_10819"), val = tensor([0, 2, 1])]; + tensor var_10822_axes_0 = const()[name = string("op_10822_axes_0"), val = tensor([2])]; + tensor var_10820_cast_fp16 = transpose(perm = var_10819, x = h_121_cast_fp16)[name = string("transpose_9")]; + tensor var_10822_cast_fp16 = expand_dims(axes = var_10822_axes_0, x = var_10820_cast_fp16)[name = string("op_10822_cast_fp16")]; + string var_10838_pad_type_0 = const()[name = string("op_10838_pad_type_0"), val = string("valid")]; + tensor var_10838_strides_0 = const()[name = string("op_10838_strides_0"), val = tensor([1, 1])]; + tensor var_10838_pad_0 = const()[name = string("op_10838_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10838_dilations_0 = const()[name = string("op_10838_dilations_0"), val = tensor([1, 1])]; + int32 var_10838_groups_0 = const()[name = string("op_10838_groups_0"), val = int32(1)]; + tensor var_10838 = conv(dilations = var_10838_dilations_0, groups = var_10838_groups_0, pad = var_10838_pad_0, pad_type = var_10838_pad_type_0, strides = var_10838_strides_0, weight = layers_c3_8_self_attn_q_proj_weight_palettized, x = var_10822_cast_fp16)[name = string("op_10838")]; + tensor var_10843 = const()[name = string("op_10843"), val = tensor([1, 8, 256, 1])]; + tensor var_10844 = reshape(shape = var_10843, x = var_10838)[name = string("op_10844")]; + tensor var_10849 = const()[name = string("op_10849"), val = tensor([0, 1, 3, 2])]; + tensor var_10859 = const()[name = string("op_10859"), val = tensor([1, 8, 256])]; + tensor var_10850 = transpose(perm = var_10849, x = var_10844)[name = string("transpose_8")]; + tensor x_369 = reshape(shape = var_10859, x = var_10850)[name = string("x_369")]; + int32 var_10865 = const()[name = string("op_10865"), val = int32(-1)]; + fp16 const_205_promoted = const()[name = string("const_205_promoted"), val = fp16(-0x1p+0)]; + tensor var_10867 = mul(x = x_369, y = const_205_promoted)[name = string("op_10867")]; + bool input_549_interleave_0 = const()[name = string("input_549_interleave_0"), val = bool(false)]; + tensor input_549 = concat(axis = var_10865, interleave = input_549_interleave_0, values = (x_369, var_10867))[name = string("input_549")]; + tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; + fp16 var_10862_to_fp16 = const()[name = string("op_10862_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_10862_to_fp16, x = input_549)[name = string("normed_533_cast_fp16")]; + tensor var_10872_split_sizes_0 = const()[name = string("op_10872_split_sizes_0"), val = tensor([256, 256])]; + int32 var_10872_axis_0 = const()[name = string("op_10872_axis_0"), val = int32(-1)]; + tensor var_10872_0, tensor var_10872_1 = split(axis = var_10872_axis_0, split_sizes = var_10872_split_sizes_0, x = normed_533_cast_fp16)[name = string("op_10872")]; + tensor var_10874 = mul(x = var_10872_0, y = layers_c2_10_self_attn_q_norm_weight)[name = string("op_10874")]; + tensor var_10879 = const()[name = string("op_10879"), val = tensor([1, 8, 1, 256])]; + tensor q_147 = reshape(shape = var_10879, x = var_10874)[name = string("q_147")]; + tensor var_10881_cast_fp16 = mul(x = q_147, y = cos_s)[name = string("op_10881_cast_fp16")]; + tensor var_10882_split_sizes_0 = const()[name = string("op_10882_split_sizes_0"), val = tensor([128, 128])]; + int32 var_10882_axis_0 = const()[name = string("op_10882_axis_0"), val = int32(-1)]; + tensor var_10882_0, tensor var_10882_1 = split(axis = var_10882_axis_0, split_sizes = var_10882_split_sizes_0, x = q_147)[name = string("op_10882")]; + fp16 const_206_promoted = const()[name = string("const_206_promoted"), val = fp16(-0x1p+0)]; + tensor var_10884 = mul(x = var_10882_1, y = const_206_promoted)[name = string("op_10884")]; + int32 var_10886 = const()[name = string("op_10886"), val = int32(-1)]; + bool var_10887_interleave_0 = const()[name = string("op_10887_interleave_0"), val = bool(false)]; + tensor var_10887 = concat(axis = var_10886, interleave = var_10887_interleave_0, values = (var_10884, var_10882_0))[name = string("op_10887")]; + tensor var_10888_cast_fp16 = mul(x = var_10887, y = sin_s)[name = string("op_10888_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_10881_cast_fp16, y = var_10888_cast_fp16)[name = string("q_cast_fp16")]; + bool attn_weights_81_transpose_x_0 = const()[name = string("attn_weights_81_transpose_x_0"), val = bool(false)]; + bool attn_weights_81_transpose_y_0 = const()[name = string("attn_weights_81_transpose_y_0"), val = bool(false)]; + tensor attn_weights_81_cast_fp16 = matmul(transpose_x = attn_weights_81_transpose_x_0, transpose_y = attn_weights_81_transpose_y_0, x = q_cast_fp16, y = transpose_94_cast_fp16)[name = string("attn_weights_81_cast_fp16")]; + tensor x_371_cast_fp16 = add(x = attn_weights_81_cast_fp16, y = causal_mask_sliding)[name = string("x_371_cast_fp16")]; + tensor reduce_max_20_axes_0 = const()[name = string("reduce_max_20_axes_0"), val = tensor([-1])]; + bool reduce_max_20_keep_dims_0 = const()[name = string("reduce_max_20_keep_dims_0"), val = bool(true)]; + tensor reduce_max_20 = reduce_max(axes = reduce_max_20_axes_0, keep_dims = reduce_max_20_keep_dims_0, x = x_371_cast_fp16)[name = string("reduce_max_20")]; + tensor var_10920 = sub(x = x_371_cast_fp16, y = reduce_max_20)[name = string("op_10920")]; + tensor var_10926 = exp(x = var_10920)[name = string("op_10926")]; + tensor var_10936_axes_0 = const()[name = string("op_10936_axes_0"), val = tensor([-1])]; + bool var_10936_keep_dims_0 = const()[name = string("op_10936_keep_dims_0"), val = bool(true)]; + tensor var_10936 = reduce_sum(axes = var_10936_axes_0, keep_dims = var_10936_keep_dims_0, x = var_10926)[name = string("op_10936")]; + tensor var_10942_cast_fp16 = real_div(x = var_10926, y = var_10936)[name = string("op_10942_cast_fp16")]; + bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; + bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; + tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = var_10942_cast_fp16, y = V_expanded_21_cast_fp16)[name = string("attn_output_121_cast_fp16")]; + tensor var_10953 = const()[name = string("op_10953"), val = tensor([0, 2, 1, 3])]; + tensor var_10960 = const()[name = string("op_10960"), val = tensor([1, 1, -1])]; + tensor var_10954_cast_fp16 = transpose(perm = var_10953, x = attn_output_121_cast_fp16)[name = string("transpose_7")]; + tensor attn_output_123_cast_fp16 = reshape(shape = var_10960, x = var_10954_cast_fp16)[name = string("attn_output_123_cast_fp16")]; + tensor var_10965 = const()[name = string("op_10965"), val = tensor([0, 2, 1])]; + string var_10981_pad_type_0 = const()[name = string("op_10981_pad_type_0"), val = string("valid")]; + int32 var_10981_groups_0 = const()[name = string("op_10981_groups_0"), val = int32(1)]; + tensor var_10981_strides_0 = const()[name = string("op_10981_strides_0"), val = tensor([1])]; + tensor var_10981_pad_0 = const()[name = string("op_10981_pad_0"), val = tensor([0, 0])]; + tensor var_10981_dilations_0 = const()[name = string("op_10981_dilations_0"), val = tensor([1])]; + tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981965952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984587456))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10966_cast_fp16 = transpose(perm = var_10965, x = attn_output_123_cast_fp16)[name = string("transpose_6")]; + tensor var_10981_cast_fp16 = conv(dilations = var_10981_dilations_0, groups = var_10981_groups_0, pad = var_10981_pad_0, pad_type = var_10981_pad_type_0, strides = var_10981_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_10966_cast_fp16)[name = string("op_10981_cast_fp16")]; + tensor var_10985 = const()[name = string("op_10985"), val = tensor([0, 2, 1])]; + int32 var_10991 = const()[name = string("op_10991"), val = int32(-1)]; + fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_375_cast_fp16 = transpose(perm = var_10985, x = var_10981_cast_fp16)[name = string("transpose_5")]; + tensor var_10993_cast_fp16 = mul(x = x_375_cast_fp16, y = const_207_promoted_to_fp16)[name = string("op_10993_cast_fp16")]; + bool input_553_interleave_0 = const()[name = string("input_553_interleave_0"), val = bool(false)]; + tensor input_553_cast_fp16 = concat(axis = var_10991, interleave = input_553_interleave_0, values = (x_375_cast_fp16, var_10993_cast_fp16))[name = string("input_553_cast_fp16")]; + tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; + fp16 var_10988_to_fp16 = const()[name = string("op_10988_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_10988_to_fp16, x = input_553_cast_fp16)[name = string("normed_537_cast_fp16")]; + tensor var_10998_split_sizes_0 = const()[name = string("op_10998_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_10998_axis_0 = const()[name = string("op_10998_axis_0"), val = int32(-1)]; + tensor var_10998_cast_fp16_0, tensor var_10998_cast_fp16_1 = split(axis = var_10998_axis_0, split_sizes = var_10998_split_sizes_0, x = normed_537_cast_fp16)[name = string("op_10998_cast_fp16")]; + tensor layers_c3_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984590080)))]; + tensor attn_output_cast_fp16 = mul(x = var_10998_cast_fp16_0, y = layers_c3_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_377_cast_fp16 = add(x = x_367_cast_fp16, y = attn_output_cast_fp16)[name = string("x_377_cast_fp16")]; + int32 var_11007 = const()[name = string("op_11007"), val = int32(-1)]; + fp16 const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11009_cast_fp16 = mul(x = x_377_cast_fp16, y = const_208_promoted_to_fp16)[name = string("op_11009_cast_fp16")]; + bool input_555_interleave_0 = const()[name = string("input_555_interleave_0"), val = bool(false)]; + tensor input_555_cast_fp16 = concat(axis = var_11007, interleave = input_555_interleave_0, values = (x_377_cast_fp16, var_11009_cast_fp16))[name = string("input_555_cast_fp16")]; + tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; + fp16 var_11004_to_fp16 = const()[name = string("op_11004_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_11004_to_fp16, x = input_555_cast_fp16)[name = string("normed_541_cast_fp16")]; + tensor var_11014_split_sizes_0 = const()[name = string("op_11014_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_11014_axis_0 = const()[name = string("op_11014_axis_0"), val = int32(-1)]; + tensor var_11014_cast_fp16_0, tensor var_11014_cast_fp16_1 = split(axis = var_11014_axis_0, split_sizes = var_11014_split_sizes_0, x = normed_541_cast_fp16)[name = string("op_11014_cast_fp16")]; + tensor layers_c3_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_c3_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984595264)))]; + tensor h_123_cast_fp16 = mul(x = var_11014_cast_fp16_0, y = layers_c3_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_123_cast_fp16")]; + tensor var_11025 = const()[name = string("op_11025"), val = tensor([0, 2, 1])]; + tensor input_557_axes_0 = const()[name = string("input_557_axes_0"), val = tensor([2])]; + tensor var_11026 = transpose(perm = var_11025, x = h_123_cast_fp16)[name = string("transpose_4")]; + tensor input_557 = expand_dims(axes = input_557_axes_0, x = var_11026)[name = string("input_557")]; + string gate_81_pad_type_0 = const()[name = string("gate_81_pad_type_0"), val = string("valid")]; + tensor gate_81_strides_0 = const()[name = string("gate_81_strides_0"), val = tensor([1, 1])]; + tensor gate_81_pad_0 = const()[name = string("gate_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_81_dilations_0 = const()[name = string("gate_81_dilations_0"), val = tensor([1, 1])]; + int32 gate_81_groups_0 = const()[name = string("gate_81_groups_0"), val = int32(1)]; + tensor gate_81 = conv(dilations = gate_81_dilations_0, groups = gate_81_groups_0, pad = gate_81_pad_0, pad_type = gate_81_pad_type_0, strides = gate_81_strides_0, weight = layers_c3_8_mlp_gate_proj_weight_palettized, x = input_557)[name = string("gate_81")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_c3_8_mlp_up_proj_weight_palettized, x = input_557)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_81)[name = string("gate")]; + tensor input_559 = mul(x = gate, y = up)[name = string("input_559")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_c3_8_mlp_down_proj_weight_palettized, x = input_559)[name = string("mlp_out")]; + tensor var_11066_axes_0 = const()[name = string("op_11066_axes_0"), val = tensor([2])]; + tensor var_11066 = squeeze(axes = var_11066_axes_0, x = mlp_out)[name = string("op_11066")]; + tensor var_11070 = const()[name = string("op_11070"), val = tensor([0, 2, 1])]; + int32 var_11076 = const()[name = string("op_11076"), val = int32(-1)]; + fp16 const_209_promoted = const()[name = string("const_209_promoted"), val = fp16(-0x1p+0)]; + tensor x_379 = transpose(perm = var_11070, x = var_11066)[name = string("transpose_3")]; + tensor var_11078 = mul(x = x_379, y = const_209_promoted)[name = string("op_11078")]; + bool input_561_interleave_0 = const()[name = string("input_561_interleave_0"), val = bool(false)]; + tensor input_561 = concat(axis = var_11076, interleave = input_561_interleave_0, values = (x_379, var_11078))[name = string("input_561")]; + tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; + fp16 var_11073_to_fp16 = const()[name = string("op_11073_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_11073_to_fp16, x = input_561)[name = string("normed_545_cast_fp16")]; + tensor var_11083_split_sizes_0 = const()[name = string("op_11083_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_11083_axis_0 = const()[name = string("op_11083_axis_0"), val = int32(-1)]; + tensor var_11083_0, tensor var_11083_1 = split(axis = var_11083_axis_0, split_sizes = var_11083_split_sizes_0, x = normed_545_cast_fp16)[name = string("op_11083")]; + tensor hidden_states_203 = mul(x = var_11083_0, y = layers_c3_8_post_feedforward_layernorm_weight)[name = string("hidden_states_203")]; + tensor hidden_states_205_cast_fp16 = add(x = x_377_cast_fp16, y = hidden_states_203)[name = string("hidden_states_205_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 8192])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 1, 8448])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; + tensor var_11111 = const()[name = string("op_11111"), val = tensor([0, 2, 1])]; + tensor input_563_axes_0 = const()[name = string("input_563_axes_0"), val = tensor([2])]; + tensor var_11112 = transpose(perm = var_11111, x = hidden_states_205_cast_fp16)[name = string("transpose_2")]; + tensor input_563 = expand_dims(axes = input_563_axes_0, x = var_11112)[name = string("input_563")]; + string gated_121_pad_type_0 = const()[name = string("gated_121_pad_type_0"), val = string("valid")]; + tensor gated_121_strides_0 = const()[name = string("gated_121_strides_0"), val = tensor([1, 1])]; + tensor gated_121_pad_0 = const()[name = string("gated_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_121_dilations_0 = const()[name = string("gated_121_dilations_0"), val = tensor([1, 1])]; + int32 gated_121_groups_0 = const()[name = string("gated_121_groups_0"), val = int32(1)]; + tensor gated_121 = conv(dilations = gated_121_dilations_0, groups = gated_121_groups_0, pad = gated_121_pad_0, pad_type = gated_121_pad_type_0, strides = gated_121_strides_0, weight = layers_c3_8_per_layer_input_gate_weight_palettized, x = input_563)[name = string("gated_121")]; + string gated_123_mode_0 = const()[name = string("gated_123_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_123 = gelu(mode = gated_123_mode_0, x = gated_121)[name = string("gated_123")]; + tensor var_11131 = const()[name = string("op_11131"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_11132_cast_fp16 = transpose(perm = var_11131, x = per_layer_slice_cast_fp16)[name = string("transpose_1")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_11132_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_565_cast_fp16 = mul(x = gated_123, y = per_layer_slice_conv_cast_fp16)[name = string("input_565_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_c3_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984600448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984928192))))[name = string("layers_c3_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_c3_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_565_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_11148_axes_0 = const()[name = string("op_11148_axes_0"), val = tensor([2])]; + tensor var_11148_cast_fp16 = squeeze(axes = var_11148_axes_0, x = gated_cast_fp16)[name = string("op_11148_cast_fp16")]; + tensor var_11152 = const()[name = string("op_11152"), val = tensor([0, 2, 1])]; + int32 var_11158 = const()[name = string("op_11158"), val = int32(-1)]; + fp16 const_210_promoted_to_fp16 = const()[name = string("const_210_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_cast_fp16 = transpose(perm = var_11152, x = var_11148_cast_fp16)[name = string("transpose_0")]; + tensor var_11160_cast_fp16 = mul(x = x_cast_fp16, y = const_210_promoted_to_fp16)[name = string("op_11160_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_11158, interleave = input_interleave_0, values = (x_cast_fp16, var_11160_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; + fp16 var_11155_to_fp16 = const()[name = string("op_11155_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_11155_to_fp16, x = input_cast_fp16)[name = string("normed_549_cast_fp16")]; + tensor var_11165_split_sizes_0 = const()[name = string("op_11165_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_11165_axis_0 = const()[name = string("op_11165_axis_0"), val = int32(-1)]; + tensor var_11165_cast_fp16_0, tensor var_11165_cast_fp16_1 = split(axis = var_11165_axis_0, split_sizes = var_11165_split_sizes_0, x = normed_549_cast_fp16)[name = string("op_11165_cast_fp16")]; + tensor layers_c3_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_c3_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984930816)))]; + tensor hidden_states_209_cast_fp16 = mul(x = var_11165_cast_fp16_0, y = layers_c3_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_209_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_205_cast_fp16, y = hidden_states_209_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_211_promoted_to_fp16 = const()[name = string("const_211_promoted_to_fp16"), val = tensor([0x1.b4p-1])]; + tensor hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_211_promoted_to_fp16)[name = string("op_11175_cast_fp16")]; + int32 var_11178_axis_0 = const()[name = string("op_11178_axis_0"), val = int32(0)]; + tensor K_sliding_out = stack(axis = var_11178_axis_0, values = (var_1722_cast_fp16, var_2281_cast_fp16, var_2840_cast_fp16, var_3399_cast_fp16, var_3958_cast_fp16, var_5034_cast_fp16, var_5593_cast_fp16, var_6152_cast_fp16, var_6711_cast_fp16, var_7280_cast_fp16))[name = string("op_11178_cast_fp16")]; + int32 var_11181_axis_0 = const()[name = string("op_11181_axis_0"), val = int32(0)]; + tensor V_sliding_out = stack(axis = var_11181_axis_0, values = (var_1724_cast_fp16, var_2283_cast_fp16, var_2842_cast_fp16, var_3401_cast_fp16, var_3960_cast_fp16, var_5036_cast_fp16, var_5595_cast_fp16, var_6154_cast_fp16, var_6713_cast_fp16, var_7282_cast_fp16))[name = string("op_11181_cast_fp16")]; + int32 var_11184_axis_0 = const()[name = string("op_11184_axis_0"), val = int32(0)]; + tensor K_full_out = stack(axis = var_11184_axis_0, values = (var_4475_cast_fp16, var_7807_cast_fp16))[name = string("op_11184_cast_fp16")]; + int32 var_11187_axis_0 = const()[name = string("op_11187_axis_0"), val = int32(0)]; + tensor V_full_out = stack(axis = var_11187_axis_0, values = (var_4477_cast_fp16, var_7809_cast_fp16))[name = string("op_11187_cast_fp16")]; + } -> (hidden_states_out, K_sliding_out, V_sliding_out, K_full_out, V_full_out, kv13_k, kv13_v, kv14_k, kv14_v); +} \ No newline at end of file diff --git a/chunk2_3way.mlmodelc/weights/weight.bin b/chunk2_3way.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdd76a14663cd78fc1a5e310f0480a852e059bb8 --- /dev/null +++ b/chunk2_3way.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dfcddee4de0905bab42c9dbf6b4d03ec1fc888d76de762dee5153423081b838 +size 984936000 diff --git a/chunk3.mlmodelc/analytics/coremldata.bin b/chunk3.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..743cc3d816cd971d742e94b590870f4b807ccf01 --- /dev/null +++ b/chunk3.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83e107e7d0f531fa20c6861ea0483120a4246adc98daba2bdc9ec015f77bc7ac +size 243 diff --git a/chunk3.mlmodelc/coremldata.bin b/chunk3.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..f0d51080450c48a965e71afe61876ffe1ced1cff --- /dev/null +++ b/chunk3.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d43e7d6694b27bec11ece3eb5bb8b3b5b185fa8bc0b668980e154952fd36bf0b +size 940 diff --git a/chunk3.mlmodelc/model.mil b/chunk3.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..3a509e87f3c5d31c8231e1c5ec49885b4e8744cf --- /dev/null +++ b/chunk3.mlmodelc/model.mil @@ -0,0 +1,3871 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func decode_q1(tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor kv13_k, tensor kv13_v, tensor kv14_k, tensor kv14_v, tensor per_layer_combined, tensor sin_f, tensor sin_s, tensor update_mask) { + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2621568))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2623680)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2624256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15731520))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15741824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28849088))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28859392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41966656))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41969280)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41974464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302208))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44924032))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44926144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58033408))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58043712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71150976))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71161280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84268544))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84271168)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84276352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604096))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87225920))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87228032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100335296))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100345600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113452864))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113463168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126570432))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126573056)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126578240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126905984))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126906304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129527808))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129529920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142637184))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142647488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155754752))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155765056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168872320))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168874944)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168880128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169207872))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169208192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171829696))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171831808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184939072))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184949376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198056640))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198066944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211174208))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211176832)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211182016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211509760))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211510080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216753024))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_self_attn_q_norm_weight = const()[name = string("layers_5_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216757184)))]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216758272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229865536))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229875840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242983104))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242993408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256100672))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256103296)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256108480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436224))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259058048))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259060160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272167424))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272177728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285284992))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298402560))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298405184)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298410368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738112))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301359936))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314469312))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314479616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327586880))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327597184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340704448))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340707072)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340712256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040000))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343661824))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343663936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356771200))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356781504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369888768))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369899072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383006336))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383008960)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383014144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383341888))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + int32 var_450 = const()[name = string("op_450"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_452_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_452_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_450, interleave = input_1_interleave_0, values = (hidden_states, var_452_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_447_to_fp16 = const()[name = string("op_447_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_447_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_457_split_sizes_0 = const()[name = string("op_457_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_457_axis_0 = const()[name = string("op_457_axis_0"), val = int32(-1)]; + tensor var_457_cast_fp16_0, tensor var_457_cast_fp16_1 = split(axis = var_457_axis_0, split_sizes = var_457_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_457_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383342208)))]; + tensor h_1_cast_fp16 = mul(x = var_457_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_463 = const()[name = string("op_463"), val = tensor([0, 2, 1])]; + tensor var_466_axes_0 = const()[name = string("op_466_axes_0"), val = tensor([2])]; + tensor var_464_cast_fp16 = transpose(perm = var_463, x = h_1_cast_fp16)[name = string("transpose_101")]; + tensor var_466_cast_fp16 = expand_dims(axes = var_466_axes_0, x = var_464_cast_fp16)[name = string("op_466_cast_fp16")]; + string var_482_pad_type_0 = const()[name = string("op_482_pad_type_0"), val = string("valid")]; + tensor var_482_strides_0 = const()[name = string("op_482_strides_0"), val = tensor([1, 1])]; + tensor var_482_pad_0 = const()[name = string("op_482_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_482_dilations_0 = const()[name = string("op_482_dilations_0"), val = tensor([1, 1])]; + int32 var_482_groups_0 = const()[name = string("op_482_groups_0"), val = int32(1)]; + tensor var_482 = conv(dilations = var_482_dilations_0, groups = var_482_groups_0, pad = var_482_pad_0, pad_type = var_482_pad_type_0, strides = var_482_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_466_cast_fp16)[name = string("op_482")]; + tensor var_487 = const()[name = string("op_487"), val = tensor([1, 8, 256, 1])]; + tensor var_488 = reshape(shape = var_487, x = var_482)[name = string("op_488")]; + tensor var_493 = const()[name = string("op_493"), val = tensor([0, 1, 3, 2])]; + tensor var_503 = const()[name = string("op_503"), val = tensor([1, 8, 256])]; + tensor var_494 = transpose(perm = var_493, x = var_488)[name = string("transpose_100")]; + tensor x_1 = reshape(shape = var_503, x = var_494)[name = string("x_1")]; + int32 var_509 = const()[name = string("op_509"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_511 = mul(x = x_1, y = const_1_promoted)[name = string("op_511")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_509, interleave = input_5_interleave_0, values = (x_1, var_511))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_506_to_fp16 = const()[name = string("op_506_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_506_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor var_516_split_sizes_0 = const()[name = string("op_516_split_sizes_0"), val = tensor([256, 256])]; + int32 var_516_axis_0 = const()[name = string("op_516_axis_0"), val = int32(-1)]; + tensor var_516_0, tensor var_516_1 = split(axis = var_516_axis_0, split_sizes = var_516_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_516")]; + tensor var_518 = mul(x = var_516_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_518")]; + tensor var_523 = const()[name = string("op_523"), val = tensor([1, 8, 1, 256])]; + tensor q_3 = reshape(shape = var_523, x = var_518)[name = string("q_3")]; + tensor var_525_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_525_cast_fp16")]; + tensor var_526_split_sizes_0 = const()[name = string("op_526_split_sizes_0"), val = tensor([128, 128])]; + int32 var_526_axis_0 = const()[name = string("op_526_axis_0"), val = int32(-1)]; + tensor var_526_0, tensor var_526_1 = split(axis = var_526_axis_0, split_sizes = var_526_split_sizes_0, x = q_3)[name = string("op_526")]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_528 = mul(x = var_526_1, y = const_2_promoted)[name = string("op_528")]; + int32 var_530 = const()[name = string("op_530"), val = int32(-1)]; + bool var_531_interleave_0 = const()[name = string("op_531_interleave_0"), val = bool(false)]; + tensor var_531 = concat(axis = var_530, interleave = var_531_interleave_0, values = (var_528, var_526_0))[name = string("op_531")]; + tensor var_532_cast_fp16 = mul(x = var_531, y = sin_s)[name = string("op_532_cast_fp16")]; + tensor q_5_cast_fp16 = add(x = var_525_cast_fp16, y = var_532_cast_fp16)[name = string("q_5_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = kv13_k)[name = string("transpose_99")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_98")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = kv13_v)[name = string("transpose_97")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_96")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_95")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_5_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_3_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_3_cast_fp16)[name = string("reduce_max_0")]; + tensor var_564 = sub(x = x_3_cast_fp16, y = reduce_max_0)[name = string("op_564")]; + tensor var_570 = exp(x = var_564)[name = string("op_570")]; + tensor var_580_axes_0 = const()[name = string("op_580_axes_0"), val = tensor([-1])]; + bool var_580_keep_dims_0 = const()[name = string("op_580_keep_dims_0"), val = bool(true)]; + tensor var_580 = reduce_sum(axes = var_580_axes_0, keep_dims = var_580_keep_dims_0, x = var_570)[name = string("op_580")]; + tensor var_586_cast_fp16 = real_div(x = var_570, y = var_580)[name = string("op_586_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_94")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_586_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_597 = const()[name = string("op_597"), val = tensor([0, 2, 1, 3])]; + tensor var_604 = const()[name = string("op_604"), val = tensor([1, 1, -1])]; + tensor var_598_cast_fp16 = transpose(perm = var_597, x = attn_output_1_cast_fp16)[name = string("transpose_93")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_604, x = var_598_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_609 = const()[name = string("op_609"), val = tensor([0, 2, 1])]; + string var_625_pad_type_0 = const()[name = string("op_625_pad_type_0"), val = string("valid")]; + int32 var_625_groups_0 = const()[name = string("op_625_groups_0"), val = int32(1)]; + tensor var_625_strides_0 = const()[name = string("op_625_strides_0"), val = tensor([1])]; + tensor var_625_pad_0 = const()[name = string("op_625_pad_0"), val = tensor([0, 0])]; + tensor var_625_dilations_0 = const()[name = string("op_625_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383347392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385968896))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_610_cast_fp16 = transpose(perm = var_609, x = attn_output_3_cast_fp16)[name = string("transpose_92")]; + tensor var_625_cast_fp16 = conv(dilations = var_625_dilations_0, groups = var_625_groups_0, pad = var_625_pad_0, pad_type = var_625_pad_type_0, strides = var_625_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_610_cast_fp16)[name = string("op_625_cast_fp16")]; + tensor var_629 = const()[name = string("op_629"), val = tensor([0, 2, 1])]; + int32 var_635 = const()[name = string("op_635"), val = int32(-1)]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_7_cast_fp16 = transpose(perm = var_629, x = var_625_cast_fp16)[name = string("transpose_91")]; + tensor var_637_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_637_cast_fp16")]; + bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; + tensor input_9_cast_fp16 = concat(axis = var_635, interleave = input_9_interleave_0, values = (x_7_cast_fp16, var_637_cast_fp16))[name = string("input_9_cast_fp16")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_632_to_fp16 = const()[name = string("op_632_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_632_to_fp16, x = input_9_cast_fp16)[name = string("normed_9_cast_fp16")]; + tensor var_642_split_sizes_0 = const()[name = string("op_642_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_642_axis_0 = const()[name = string("op_642_axis_0"), val = int32(-1)]; + tensor var_642_cast_fp16_0, tensor var_642_cast_fp16_1 = split(axis = var_642_axis_0, split_sizes = var_642_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_642_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385971520)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_642_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_9_cast_fp16")]; + int32 var_651 = const()[name = string("op_651"), val = int32(-1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_653_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_653_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_651, interleave = input_11_interleave_0, values = (x_9_cast_fp16, var_653_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_648_to_fp16 = const()[name = string("op_648_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_648_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor var_658_split_sizes_0 = const()[name = string("op_658_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_658_axis_0 = const()[name = string("op_658_axis_0"), val = int32(-1)]; + tensor var_658_cast_fp16_0, tensor var_658_cast_fp16_1 = split(axis = var_658_axis_0, split_sizes = var_658_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_658_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385976704)))]; + tensor h_3_cast_fp16 = mul(x = var_658_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_669 = const()[name = string("op_669"), val = tensor([0, 2, 1])]; + tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; + tensor var_670 = transpose(perm = var_669, x = h_3_cast_fp16)[name = string("transpose_90")]; + tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_670)[name = string("input_13")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_15 = mul(x = gate_3, y = up_1)[name = string("input_15")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_15)[name = string("mlp_out_1")]; + tensor var_710_axes_0 = const()[name = string("op_710_axes_0"), val = tensor([2])]; + tensor var_710 = squeeze(axes = var_710_axes_0, x = mlp_out_1)[name = string("op_710")]; + tensor var_714 = const()[name = string("op_714"), val = tensor([0, 2, 1])]; + int32 var_720 = const()[name = string("op_720"), val = int32(-1)]; + fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; + tensor x_11 = transpose(perm = var_714, x = var_710)[name = string("transpose_89")]; + tensor var_722 = mul(x = x_11, y = const_5_promoted)[name = string("op_722")]; + bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; + tensor input_17 = concat(axis = var_720, interleave = input_17_interleave_0, values = (x_11, var_722))[name = string("input_17")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_717_to_fp16 = const()[name = string("op_717_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_717_to_fp16, x = input_17)[name = string("normed_17_cast_fp16")]; + tensor var_727_split_sizes_0 = const()[name = string("op_727_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_727_axis_0 = const()[name = string("op_727_axis_0"), val = int32(-1)]; + tensor var_727_0, tensor var_727_1 = split(axis = var_727_axis_0, split_sizes = var_727_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_727")]; + tensor hidden_states_3 = mul(x = var_727_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 6144])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 1, 6400])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_755 = const()[name = string("op_755"), val = tensor([0, 2, 1])]; + tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([2])]; + tensor var_756 = transpose(perm = var_755, x = hidden_states_5_cast_fp16)[name = string("transpose_88")]; + tensor input_19 = expand_dims(axes = input_19_axes_0, x = var_756)[name = string("input_19")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_19)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_775 = const()[name = string("op_775"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_776_cast_fp16 = transpose(perm = var_775, x = per_layer_slice_1_cast_fp16)[name = string("transpose_87")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_776_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_21_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_21_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385981888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386309632))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_21_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_792_axes_0 = const()[name = string("op_792_axes_0"), val = tensor([2])]; + tensor var_792_cast_fp16 = squeeze(axes = var_792_axes_0, x = gated_5_cast_fp16)[name = string("op_792_cast_fp16")]; + tensor var_796 = const()[name = string("op_796"), val = tensor([0, 2, 1])]; + int32 var_802 = const()[name = string("op_802"), val = int32(-1)]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_13_cast_fp16 = transpose(perm = var_796, x = var_792_cast_fp16)[name = string("transpose_86")]; + tensor var_804_cast_fp16 = mul(x = x_13_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_804_cast_fp16")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23_cast_fp16 = concat(axis = var_802, interleave = input_23_interleave_0, values = (x_13_cast_fp16, var_804_cast_fp16))[name = string("input_23_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_799_to_fp16 = const()[name = string("op_799_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_799_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor var_809_split_sizes_0 = const()[name = string("op_809_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_809_axis_0 = const()[name = string("op_809_axis_0"), val = int32(-1)]; + tensor var_809_cast_fp16_0, tensor var_809_cast_fp16_1 = split(axis = var_809_axis_0, split_sizes = var_809_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_809_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386312256)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_809_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = tensor([0x1.02p-1])]; + tensor x_15_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("x_15_cast_fp16")]; + int32 var_824 = const()[name = string("op_824"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_826_cast_fp16 = mul(x = x_15_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_826_cast_fp16")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25_cast_fp16 = concat(axis = var_824, interleave = input_25_interleave_0, values = (x_15_cast_fp16, var_826_cast_fp16))[name = string("input_25_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_821_to_fp16 = const()[name = string("op_821_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_821_to_fp16, x = input_25_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor var_831_split_sizes_0 = const()[name = string("op_831_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_831_axis_0 = const()[name = string("op_831_axis_0"), val = int32(-1)]; + tensor var_831_cast_fp16_0, tensor var_831_cast_fp16_1 = split(axis = var_831_axis_0, split_sizes = var_831_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_831_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386317440)))]; + tensor h_7_cast_fp16 = mul(x = var_831_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_837 = const()[name = string("op_837"), val = tensor([0, 2, 1])]; + tensor var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor([2])]; + tensor var_838_cast_fp16 = transpose(perm = var_837, x = h_7_cast_fp16)[name = string("transpose_85")]; + tensor var_840_cast_fp16 = expand_dims(axes = var_840_axes_0, x = var_838_cast_fp16)[name = string("op_840_cast_fp16")]; + string var_856_pad_type_0 = const()[name = string("op_856_pad_type_0"), val = string("valid")]; + tensor var_856_strides_0 = const()[name = string("op_856_strides_0"), val = tensor([1, 1])]; + tensor var_856_pad_0 = const()[name = string("op_856_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_856_dilations_0 = const()[name = string("op_856_dilations_0"), val = tensor([1, 1])]; + int32 var_856_groups_0 = const()[name = string("op_856_groups_0"), val = int32(1)]; + tensor var_856 = conv(dilations = var_856_dilations_0, groups = var_856_groups_0, pad = var_856_pad_0, pad_type = var_856_pad_type_0, strides = var_856_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_840_cast_fp16)[name = string("op_856")]; + tensor var_861 = const()[name = string("op_861"), val = tensor([1, 8, 256, 1])]; + tensor var_862 = reshape(shape = var_861, x = var_856)[name = string("op_862")]; + tensor var_867 = const()[name = string("op_867"), val = tensor([0, 1, 3, 2])]; + tensor var_877 = const()[name = string("op_877"), val = tensor([1, 8, 256])]; + tensor var_868 = transpose(perm = var_867, x = var_862)[name = string("transpose_84")]; + tensor x_17 = reshape(shape = var_877, x = var_868)[name = string("x_17")]; + int32 var_883 = const()[name = string("op_883"), val = int32(-1)]; + fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; + tensor var_885 = mul(x = x_17, y = const_9_promoted)[name = string("op_885")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29 = concat(axis = var_883, interleave = input_29_interleave_0, values = (x_17, var_885))[name = string("input_29")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_880_to_fp16 = const()[name = string("op_880_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_880_to_fp16, x = input_29)[name = string("normed_29_cast_fp16")]; + tensor var_890_split_sizes_0 = const()[name = string("op_890_split_sizes_0"), val = tensor([256, 256])]; + int32 var_890_axis_0 = const()[name = string("op_890_axis_0"), val = int32(-1)]; + tensor var_890_0, tensor var_890_1 = split(axis = var_890_axis_0, split_sizes = var_890_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_890")]; + tensor var_892 = mul(x = var_890_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_892")]; + tensor var_897 = const()[name = string("op_897"), val = tensor([1, 8, 1, 256])]; + tensor q_9 = reshape(shape = var_897, x = var_892)[name = string("q_9")]; + tensor var_899_cast_fp16 = mul(x = q_9, y = cos_s)[name = string("op_899_cast_fp16")]; + tensor var_900_split_sizes_0 = const()[name = string("op_900_split_sizes_0"), val = tensor([128, 128])]; + int32 var_900_axis_0 = const()[name = string("op_900_axis_0"), val = int32(-1)]; + tensor var_900_0, tensor var_900_1 = split(axis = var_900_axis_0, split_sizes = var_900_split_sizes_0, x = q_9)[name = string("op_900")]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor var_902 = mul(x = var_900_1, y = const_10_promoted)[name = string("op_902")]; + int32 var_904 = const()[name = string("op_904"), val = int32(-1)]; + bool var_905_interleave_0 = const()[name = string("op_905_interleave_0"), val = bool(false)]; + tensor var_905 = concat(axis = var_904, interleave = var_905_interleave_0, values = (var_902, var_900_0))[name = string("op_905")]; + tensor var_906_cast_fp16 = mul(x = var_905, y = sin_s)[name = string("op_906_cast_fp16")]; + tensor q_11_cast_fp16 = add(x = var_899_cast_fp16, y = var_906_cast_fp16)[name = string("q_11_cast_fp16")]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_11_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_19_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_19_cast_fp16)[name = string("reduce_max_1")]; + tensor var_938 = sub(x = x_19_cast_fp16, y = reduce_max_1)[name = string("op_938")]; + tensor var_944 = exp(x = var_938)[name = string("op_944")]; + tensor var_954_axes_0 = const()[name = string("op_954_axes_0"), val = tensor([-1])]; + bool var_954_keep_dims_0 = const()[name = string("op_954_keep_dims_0"), val = bool(true)]; + tensor var_954 = reduce_sum(axes = var_954_axes_0, keep_dims = var_954_keep_dims_0, x = var_944)[name = string("op_954")]; + tensor var_960_cast_fp16 = real_div(x = var_944, y = var_954)[name = string("op_960_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_960_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_971 = const()[name = string("op_971"), val = tensor([0, 2, 1, 3])]; + tensor var_978 = const()[name = string("op_978"), val = tensor([1, 1, -1])]; + tensor var_972_cast_fp16 = transpose(perm = var_971, x = attn_output_7_cast_fp16)[name = string("transpose_83")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_978, x = var_972_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_983 = const()[name = string("op_983"), val = tensor([0, 2, 1])]; + string var_999_pad_type_0 = const()[name = string("op_999_pad_type_0"), val = string("valid")]; + int32 var_999_groups_0 = const()[name = string("op_999_groups_0"), val = int32(1)]; + tensor var_999_strides_0 = const()[name = string("op_999_strides_0"), val = tensor([1])]; + tensor var_999_pad_0 = const()[name = string("op_999_pad_0"), val = tensor([0, 0])]; + tensor var_999_dilations_0 = const()[name = string("op_999_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386322624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388944128))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_984_cast_fp16 = transpose(perm = var_983, x = attn_output_9_cast_fp16)[name = string("transpose_82")]; + tensor var_999_cast_fp16 = conv(dilations = var_999_dilations_0, groups = var_999_groups_0, pad = var_999_pad_0, pad_type = var_999_pad_type_0, strides = var_999_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_984_cast_fp16)[name = string("op_999_cast_fp16")]; + tensor var_1003 = const()[name = string("op_1003"), val = tensor([0, 2, 1])]; + int32 var_1009 = const()[name = string("op_1009"), val = int32(-1)]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_23_cast_fp16 = transpose(perm = var_1003, x = var_999_cast_fp16)[name = string("transpose_81")]; + tensor var_1011_cast_fp16 = mul(x = x_23_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1011_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_1009, interleave = input_33_interleave_0, values = (x_23_cast_fp16, var_1011_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1006_to_fp16 = const()[name = string("op_1006_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1006_to_fp16, x = input_33_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor var_1016_split_sizes_0 = const()[name = string("op_1016_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1016_axis_0 = const()[name = string("op_1016_axis_0"), val = int32(-1)]; + tensor var_1016_cast_fp16_0, tensor var_1016_cast_fp16_1 = split(axis = var_1016_axis_0, split_sizes = var_1016_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1016_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388946752)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1016_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_15_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_1025 = const()[name = string("op_1025"), val = int32(-1)]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1027_cast_fp16 = mul(x = x_25_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1027_cast_fp16")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35_cast_fp16 = concat(axis = var_1025, interleave = input_35_interleave_0, values = (x_25_cast_fp16, var_1027_cast_fp16))[name = string("input_35_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1022_to_fp16 = const()[name = string("op_1022_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1022_to_fp16, x = input_35_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor var_1032_split_sizes_0 = const()[name = string("op_1032_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1032_axis_0 = const()[name = string("op_1032_axis_0"), val = int32(-1)]; + tensor var_1032_cast_fp16_0, tensor var_1032_cast_fp16_1 = split(axis = var_1032_axis_0, split_sizes = var_1032_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1032_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388951936)))]; + tensor h_9_cast_fp16 = mul(x = var_1032_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1043 = const()[name = string("op_1043"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_1044 = transpose(perm = var_1043, x = h_9_cast_fp16)[name = string("transpose_80")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_1044)[name = string("input_37")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_37)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_39 = mul(x = gate_7, y = up_3)[name = string("input_39")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_39)[name = string("mlp_out_3")]; + tensor var_1084_axes_0 = const()[name = string("op_1084_axes_0"), val = tensor([2])]; + tensor var_1084 = squeeze(axes = var_1084_axes_0, x = mlp_out_3)[name = string("op_1084")]; + tensor var_1088 = const()[name = string("op_1088"), val = tensor([0, 2, 1])]; + int32 var_1094 = const()[name = string("op_1094"), val = int32(-1)]; + fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; + tensor x_27 = transpose(perm = var_1088, x = var_1084)[name = string("transpose_79")]; + tensor var_1096 = mul(x = x_27, y = const_13_promoted)[name = string("op_1096")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_1094, interleave = input_41_interleave_0, values = (x_27, var_1096))[name = string("input_41")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1091_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")]; + tensor var_1101_split_sizes_0 = const()[name = string("op_1101_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1101_axis_0 = const()[name = string("op_1101_axis_0"), val = int32(-1)]; + tensor var_1101_0, tensor var_1101_1 = split(axis = var_1101_axis_0, split_sizes = var_1101_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1101")]; + tensor hidden_states_13 = mul(x = var_1101_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 6400])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 1, 6656])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1129 = const()[name = string("op_1129"), val = tensor([0, 2, 1])]; + tensor input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor([2])]; + tensor var_1130 = transpose(perm = var_1129, x = hidden_states_15_cast_fp16)[name = string("transpose_78")]; + tensor input_43 = expand_dims(axes = input_43_axes_0, x = var_1130)[name = string("input_43")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_43)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1149 = const()[name = string("op_1149"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1150_cast_fp16 = transpose(perm = var_1149, x = per_layer_slice_3_cast_fp16)[name = string("transpose_77")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1150_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_45_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_45_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388957120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389284864))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_45_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_1166_axes_0 = const()[name = string("op_1166_axes_0"), val = tensor([2])]; + tensor var_1166_cast_fp16 = squeeze(axes = var_1166_axes_0, x = gated_11_cast_fp16)[name = string("op_1166_cast_fp16")]; + tensor var_1170 = const()[name = string("op_1170"), val = tensor([0, 2, 1])]; + int32 var_1176 = const()[name = string("op_1176"), val = int32(-1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_29_cast_fp16 = transpose(perm = var_1170, x = var_1166_cast_fp16)[name = string("transpose_76")]; + tensor var_1178_cast_fp16 = mul(x = x_29_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1178_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_1176, interleave = input_47_interleave_0, values = (x_29_cast_fp16, var_1178_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1173_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1183_split_sizes_0 = const()[name = string("op_1183_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1183_axis_0 = const()[name = string("op_1183_axis_0"), val = int32(-1)]; + tensor var_1183_cast_fp16_0, tensor var_1183_cast_fp16_1 = split(axis = var_1183_axis_0, split_sizes = var_1183_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1183_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389287488)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_1183_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = tensor([0x1.6ep-1])]; + tensor x_31_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_15_promoted_to_fp16)[name = string("x_31_cast_fp16")]; + int32 var_1198 = const()[name = string("op_1198"), val = int32(-1)]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1200_cast_fp16 = mul(x = x_31_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1200_cast_fp16")]; + bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; + tensor input_49_cast_fp16 = concat(axis = var_1198, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1200_cast_fp16))[name = string("input_49_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1195_to_fp16 = const()[name = string("op_1195_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1195_to_fp16, x = input_49_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor var_1205_split_sizes_0 = const()[name = string("op_1205_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1205_axis_0 = const()[name = string("op_1205_axis_0"), val = int32(-1)]; + tensor var_1205_cast_fp16_0, tensor var_1205_cast_fp16_1 = split(axis = var_1205_axis_0, split_sizes = var_1205_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1205_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389292672)))]; + tensor h_13_cast_fp16 = mul(x = var_1205_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_1211 = const()[name = string("op_1211"), val = tensor([0, 2, 1])]; + tensor var_1214_axes_0 = const()[name = string("op_1214_axes_0"), val = tensor([2])]; + tensor var_1212_cast_fp16 = transpose(perm = var_1211, x = h_13_cast_fp16)[name = string("transpose_75")]; + tensor var_1214_cast_fp16 = expand_dims(axes = var_1214_axes_0, x = var_1212_cast_fp16)[name = string("op_1214_cast_fp16")]; + string var_1230_pad_type_0 = const()[name = string("op_1230_pad_type_0"), val = string("valid")]; + tensor var_1230_strides_0 = const()[name = string("op_1230_strides_0"), val = tensor([1, 1])]; + tensor var_1230_pad_0 = const()[name = string("op_1230_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1230_dilations_0 = const()[name = string("op_1230_dilations_0"), val = tensor([1, 1])]; + int32 var_1230_groups_0 = const()[name = string("op_1230_groups_0"), val = int32(1)]; + tensor var_1230 = conv(dilations = var_1230_dilations_0, groups = var_1230_groups_0, pad = var_1230_pad_0, pad_type = var_1230_pad_type_0, strides = var_1230_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1214_cast_fp16)[name = string("op_1230")]; + tensor var_1235 = const()[name = string("op_1235"), val = tensor([1, 8, 256, 1])]; + tensor var_1236 = reshape(shape = var_1235, x = var_1230)[name = string("op_1236")]; + tensor var_1241 = const()[name = string("op_1241"), val = tensor([0, 1, 3, 2])]; + tensor var_1251 = const()[name = string("op_1251"), val = tensor([1, 8, 256])]; + tensor var_1242 = transpose(perm = var_1241, x = var_1236)[name = string("transpose_74")]; + tensor x_33 = reshape(shape = var_1251, x = var_1242)[name = string("x_33")]; + int32 var_1257 = const()[name = string("op_1257"), val = int32(-1)]; + fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; + tensor var_1259 = mul(x = x_33, y = const_17_promoted)[name = string("op_1259")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53 = concat(axis = var_1257, interleave = input_53_interleave_0, values = (x_33, var_1259))[name = string("input_53")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1254_to_fp16 = const()[name = string("op_1254_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1254_to_fp16, x = input_53)[name = string("normed_53_cast_fp16")]; + tensor var_1264_split_sizes_0 = const()[name = string("op_1264_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1264_axis_0 = const()[name = string("op_1264_axis_0"), val = int32(-1)]; + tensor var_1264_0, tensor var_1264_1 = split(axis = var_1264_axis_0, split_sizes = var_1264_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1264")]; + tensor var_1266 = mul(x = var_1264_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_1266")]; + tensor var_1271 = const()[name = string("op_1271"), val = tensor([1, 8, 1, 256])]; + tensor q_15 = reshape(shape = var_1271, x = var_1266)[name = string("q_15")]; + tensor var_1273_cast_fp16 = mul(x = q_15, y = cos_s)[name = string("op_1273_cast_fp16")]; + tensor var_1274_split_sizes_0 = const()[name = string("op_1274_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1274_axis_0 = const()[name = string("op_1274_axis_0"), val = int32(-1)]; + tensor var_1274_0, tensor var_1274_1 = split(axis = var_1274_axis_0, split_sizes = var_1274_split_sizes_0, x = q_15)[name = string("op_1274")]; + fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)]; + tensor var_1276 = mul(x = var_1274_1, y = const_18_promoted)[name = string("op_1276")]; + int32 var_1278 = const()[name = string("op_1278"), val = int32(-1)]; + bool var_1279_interleave_0 = const()[name = string("op_1279_interleave_0"), val = bool(false)]; + tensor var_1279 = concat(axis = var_1278, interleave = var_1279_interleave_0, values = (var_1276, var_1274_0))[name = string("op_1279")]; + tensor var_1280_cast_fp16 = mul(x = var_1279, y = sin_s)[name = string("op_1280_cast_fp16")]; + tensor q_17_cast_fp16 = add(x = var_1273_cast_fp16, y = var_1280_cast_fp16)[name = string("q_17_cast_fp16")]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_17_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_35_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_35_cast_fp16)[name = string("reduce_max_2")]; + tensor var_1312 = sub(x = x_35_cast_fp16, y = reduce_max_2)[name = string("op_1312")]; + tensor var_1318 = exp(x = var_1312)[name = string("op_1318")]; + tensor var_1328_axes_0 = const()[name = string("op_1328_axes_0"), val = tensor([-1])]; + bool var_1328_keep_dims_0 = const()[name = string("op_1328_keep_dims_0"), val = bool(true)]; + tensor var_1328 = reduce_sum(axes = var_1328_axes_0, keep_dims = var_1328_keep_dims_0, x = var_1318)[name = string("op_1328")]; + tensor var_1334_cast_fp16 = real_div(x = var_1318, y = var_1328)[name = string("op_1334_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_1334_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_1345 = const()[name = string("op_1345"), val = tensor([0, 2, 1, 3])]; + tensor var_1352 = const()[name = string("op_1352"), val = tensor([1, 1, -1])]; + tensor var_1346_cast_fp16 = transpose(perm = var_1345, x = attn_output_13_cast_fp16)[name = string("transpose_73")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_1352, x = var_1346_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_1357 = const()[name = string("op_1357"), val = tensor([0, 2, 1])]; + string var_1373_pad_type_0 = const()[name = string("op_1373_pad_type_0"), val = string("valid")]; + int32 var_1373_groups_0 = const()[name = string("op_1373_groups_0"), val = int32(1)]; + tensor var_1373_strides_0 = const()[name = string("op_1373_strides_0"), val = tensor([1])]; + tensor var_1373_pad_0 = const()[name = string("op_1373_pad_0"), val = tensor([0, 0])]; + tensor var_1373_dilations_0 = const()[name = string("op_1373_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389297856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391919360))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1358_cast_fp16 = transpose(perm = var_1357, x = attn_output_15_cast_fp16)[name = string("transpose_72")]; + tensor var_1373_cast_fp16 = conv(dilations = var_1373_dilations_0, groups = var_1373_groups_0, pad = var_1373_pad_0, pad_type = var_1373_pad_type_0, strides = var_1373_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_1358_cast_fp16)[name = string("op_1373_cast_fp16")]; + tensor var_1377 = const()[name = string("op_1377"), val = tensor([0, 2, 1])]; + int32 var_1383 = const()[name = string("op_1383"), val = int32(-1)]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_39_cast_fp16 = transpose(perm = var_1377, x = var_1373_cast_fp16)[name = string("transpose_71")]; + tensor var_1385_cast_fp16 = mul(x = x_39_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1385_cast_fp16")]; + bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; + tensor input_57_cast_fp16 = concat(axis = var_1383, interleave = input_57_interleave_0, values = (x_39_cast_fp16, var_1385_cast_fp16))[name = string("input_57_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_1380_to_fp16 = const()[name = string("op_1380_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1380_to_fp16, x = input_57_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_1390_split_sizes_0 = const()[name = string("op_1390_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1390_axis_0 = const()[name = string("op_1390_axis_0"), val = int32(-1)]; + tensor var_1390_cast_fp16_0, tensor var_1390_cast_fp16_1 = split(axis = var_1390_axis_0, split_sizes = var_1390_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1390_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391921984)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_1390_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_41_cast_fp16 = add(x = x_31_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_41_cast_fp16")]; + int32 var_1399 = const()[name = string("op_1399"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1401_cast_fp16 = mul(x = x_41_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1401_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_1399, interleave = input_59_interleave_0, values = (x_41_cast_fp16, var_1401_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_1396_to_fp16 = const()[name = string("op_1396_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1396_to_fp16, x = input_59_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor var_1406_split_sizes_0 = const()[name = string("op_1406_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1406_axis_0 = const()[name = string("op_1406_axis_0"), val = int32(-1)]; + tensor var_1406_cast_fp16_0, tensor var_1406_cast_fp16_1 = split(axis = var_1406_axis_0, split_sizes = var_1406_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1406_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391927168)))]; + tensor h_15_cast_fp16 = mul(x = var_1406_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_1417 = const()[name = string("op_1417"), val = tensor([0, 2, 1])]; + tensor input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor([2])]; + tensor var_1418 = transpose(perm = var_1417, x = h_15_cast_fp16)[name = string("transpose_70")]; + tensor input_61 = expand_dims(axes = input_61_axes_0, x = var_1418)[name = string("input_61")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_61)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_61)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_63 = mul(x = gate_11, y = up_5)[name = string("input_63")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_63)[name = string("mlp_out_5")]; + tensor var_1458_axes_0 = const()[name = string("op_1458_axes_0"), val = tensor([2])]; + tensor var_1458 = squeeze(axes = var_1458_axes_0, x = mlp_out_5)[name = string("op_1458")]; + tensor var_1462 = const()[name = string("op_1462"), val = tensor([0, 2, 1])]; + int32 var_1468 = const()[name = string("op_1468"), val = int32(-1)]; + fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; + tensor x_43 = transpose(perm = var_1462, x = var_1458)[name = string("transpose_69")]; + tensor var_1470 = mul(x = x_43, y = const_21_promoted)[name = string("op_1470")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_1468, interleave = input_65_interleave_0, values = (x_43, var_1470))[name = string("input_65")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1465_to_fp16, x = input_65)[name = string("normed_65_cast_fp16")]; + tensor var_1475_split_sizes_0 = const()[name = string("op_1475_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1475_axis_0 = const()[name = string("op_1475_axis_0"), val = int32(-1)]; + tensor var_1475_0, tensor var_1475_1 = split(axis = var_1475_axis_0, split_sizes = var_1475_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1475")]; + tensor hidden_states_23 = mul(x = var_1475_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_41_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 6656])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 1, 6912])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_1503 = const()[name = string("op_1503"), val = tensor([0, 2, 1])]; + tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; + tensor var_1504 = transpose(perm = var_1503, x = hidden_states_25_cast_fp16)[name = string("transpose_68")]; + tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_1504)[name = string("input_67")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_67)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_1523 = const()[name = string("op_1523"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_1524_cast_fp16 = transpose(perm = var_1523, x = per_layer_slice_5_cast_fp16)[name = string("transpose_67")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_1524_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_69_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_69_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391932352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392260096))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_69_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_1540_axes_0 = const()[name = string("op_1540_axes_0"), val = tensor([2])]; + tensor var_1540_cast_fp16 = squeeze(axes = var_1540_axes_0, x = gated_17_cast_fp16)[name = string("op_1540_cast_fp16")]; + tensor var_1544 = const()[name = string("op_1544"), val = tensor([0, 2, 1])]; + int32 var_1550 = const()[name = string("op_1550"), val = int32(-1)]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_45_cast_fp16 = transpose(perm = var_1544, x = var_1540_cast_fp16)[name = string("transpose_66")]; + tensor var_1552_cast_fp16 = mul(x = x_45_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1552_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_1550, interleave = input_71_interleave_0, values = (x_45_cast_fp16, var_1552_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_1547_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor var_1557_split_sizes_0 = const()[name = string("op_1557_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1557_axis_0 = const()[name = string("op_1557_axis_0"), val = int32(-1)]; + tensor var_1557_cast_fp16_0, tensor var_1557_cast_fp16_1 = split(axis = var_1557_axis_0, split_sizes = var_1557_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_1557_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392262720)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_1557_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor([0x1.6ep-1])]; + tensor x_47_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_47_cast_fp16")]; + int32 var_1572 = const()[name = string("op_1572"), val = int32(-1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1574_cast_fp16 = mul(x = x_47_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1574_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_1572, interleave = input_73_interleave_0, values = (x_47_cast_fp16, var_1574_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_1569_to_fp16 = const()[name = string("op_1569_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_1569_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_1579_split_sizes_0 = const()[name = string("op_1579_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1579_axis_0 = const()[name = string("op_1579_axis_0"), val = int32(-1)]; + tensor var_1579_cast_fp16_0, tensor var_1579_cast_fp16_1 = split(axis = var_1579_axis_0, split_sizes = var_1579_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_1579_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392267904)))]; + tensor h_19_cast_fp16 = mul(x = var_1579_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_1585 = const()[name = string("op_1585"), val = tensor([0, 2, 1])]; + tensor var_1588_axes_0 = const()[name = string("op_1588_axes_0"), val = tensor([2])]; + tensor var_1586_cast_fp16 = transpose(perm = var_1585, x = h_19_cast_fp16)[name = string("transpose_65")]; + tensor var_1588_cast_fp16 = expand_dims(axes = var_1588_axes_0, x = var_1586_cast_fp16)[name = string("op_1588_cast_fp16")]; + string var_1604_pad_type_0 = const()[name = string("op_1604_pad_type_0"), val = string("valid")]; + tensor var_1604_strides_0 = const()[name = string("op_1604_strides_0"), val = tensor([1, 1])]; + tensor var_1604_pad_0 = const()[name = string("op_1604_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1604_dilations_0 = const()[name = string("op_1604_dilations_0"), val = tensor([1, 1])]; + int32 var_1604_groups_0 = const()[name = string("op_1604_groups_0"), val = int32(1)]; + tensor var_1604 = conv(dilations = var_1604_dilations_0, groups = var_1604_groups_0, pad = var_1604_pad_0, pad_type = var_1604_pad_type_0, strides = var_1604_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_1588_cast_fp16)[name = string("op_1604")]; + tensor var_1609 = const()[name = string("op_1609"), val = tensor([1, 8, 256, 1])]; + tensor var_1610 = reshape(shape = var_1609, x = var_1604)[name = string("op_1610")]; + tensor var_1615 = const()[name = string("op_1615"), val = tensor([0, 1, 3, 2])]; + tensor var_1625 = const()[name = string("op_1625"), val = tensor([1, 8, 256])]; + tensor var_1616 = transpose(perm = var_1615, x = var_1610)[name = string("transpose_64")]; + tensor x_49 = reshape(shape = var_1625, x = var_1616)[name = string("x_49")]; + int32 var_1631 = const()[name = string("op_1631"), val = int32(-1)]; + fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; + tensor var_1633 = mul(x = x_49, y = const_25_promoted)[name = string("op_1633")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77 = concat(axis = var_1631, interleave = input_77_interleave_0, values = (x_49, var_1633))[name = string("input_77")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_1628_to_fp16 = const()[name = string("op_1628_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_1628_to_fp16, x = input_77)[name = string("normed_77_cast_fp16")]; + tensor var_1638_split_sizes_0 = const()[name = string("op_1638_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1638_axis_0 = const()[name = string("op_1638_axis_0"), val = int32(-1)]; + tensor var_1638_0, tensor var_1638_1 = split(axis = var_1638_axis_0, split_sizes = var_1638_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_1638")]; + tensor var_1640 = mul(x = var_1638_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_1640")]; + tensor var_1645 = const()[name = string("op_1645"), val = tensor([1, 8, 1, 256])]; + tensor q_21 = reshape(shape = var_1645, x = var_1640)[name = string("q_21")]; + tensor var_1647_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_1647_cast_fp16")]; + tensor var_1648_split_sizes_0 = const()[name = string("op_1648_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1648_axis_0 = const()[name = string("op_1648_axis_0"), val = int32(-1)]; + tensor var_1648_0, tensor var_1648_1 = split(axis = var_1648_axis_0, split_sizes = var_1648_split_sizes_0, x = q_21)[name = string("op_1648")]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_1650 = mul(x = var_1648_1, y = const_26_promoted)[name = string("op_1650")]; + int32 var_1652 = const()[name = string("op_1652"), val = int32(-1)]; + bool var_1653_interleave_0 = const()[name = string("op_1653_interleave_0"), val = bool(false)]; + tensor var_1653 = concat(axis = var_1652, interleave = var_1653_interleave_0, values = (var_1650, var_1648_0))[name = string("op_1653")]; + tensor var_1654_cast_fp16 = mul(x = var_1653, y = sin_s)[name = string("op_1654_cast_fp16")]; + tensor q_23_cast_fp16 = add(x = var_1647_cast_fp16, y = var_1654_cast_fp16)[name = string("q_23_cast_fp16")]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_23_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_51_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_51_cast_fp16)[name = string("reduce_max_3")]; + tensor var_1686 = sub(x = x_51_cast_fp16, y = reduce_max_3)[name = string("op_1686")]; + tensor var_1692 = exp(x = var_1686)[name = string("op_1692")]; + tensor var_1702_axes_0 = const()[name = string("op_1702_axes_0"), val = tensor([-1])]; + bool var_1702_keep_dims_0 = const()[name = string("op_1702_keep_dims_0"), val = bool(true)]; + tensor var_1702 = reduce_sum(axes = var_1702_axes_0, keep_dims = var_1702_keep_dims_0, x = var_1692)[name = string("op_1702")]; + tensor var_1708_cast_fp16 = real_div(x = var_1692, y = var_1702)[name = string("op_1708_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_1708_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_1719 = const()[name = string("op_1719"), val = tensor([0, 2, 1, 3])]; + tensor var_1726 = const()[name = string("op_1726"), val = tensor([1, 1, -1])]; + tensor var_1720_cast_fp16 = transpose(perm = var_1719, x = attn_output_19_cast_fp16)[name = string("transpose_63")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_1726, x = var_1720_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1731 = const()[name = string("op_1731"), val = tensor([0, 2, 1])]; + string var_1747_pad_type_0 = const()[name = string("op_1747_pad_type_0"), val = string("valid")]; + int32 var_1747_groups_0 = const()[name = string("op_1747_groups_0"), val = int32(1)]; + tensor var_1747_strides_0 = const()[name = string("op_1747_strides_0"), val = tensor([1])]; + tensor var_1747_pad_0 = const()[name = string("op_1747_pad_0"), val = tensor([0, 0])]; + tensor var_1747_dilations_0 = const()[name = string("op_1747_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392273088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394894592))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1732_cast_fp16 = transpose(perm = var_1731, x = attn_output_21_cast_fp16)[name = string("transpose_62")]; + tensor var_1747_cast_fp16 = conv(dilations = var_1747_dilations_0, groups = var_1747_groups_0, pad = var_1747_pad_0, pad_type = var_1747_pad_type_0, strides = var_1747_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_1732_cast_fp16)[name = string("op_1747_cast_fp16")]; + tensor var_1751 = const()[name = string("op_1751"), val = tensor([0, 2, 1])]; + int32 var_1757 = const()[name = string("op_1757"), val = int32(-1)]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_55_cast_fp16 = transpose(perm = var_1751, x = var_1747_cast_fp16)[name = string("transpose_61")]; + tensor var_1759_cast_fp16 = mul(x = x_55_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1759_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_1757, interleave = input_81_interleave_0, values = (x_55_cast_fp16, var_1759_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_1754_to_fp16 = const()[name = string("op_1754_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_1754_to_fp16, x = input_81_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor var_1764_split_sizes_0 = const()[name = string("op_1764_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1764_axis_0 = const()[name = string("op_1764_axis_0"), val = int32(-1)]; + tensor var_1764_cast_fp16_0, tensor var_1764_cast_fp16_1 = split(axis = var_1764_axis_0, split_sizes = var_1764_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_1764_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394897216)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_1764_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_47_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_57_cast_fp16")]; + int32 var_1773 = const()[name = string("op_1773"), val = int32(-1)]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1775_cast_fp16 = mul(x = x_57_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1775_cast_fp16")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83_cast_fp16 = concat(axis = var_1773, interleave = input_83_interleave_0, values = (x_57_cast_fp16, var_1775_cast_fp16))[name = string("input_83_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_1770_to_fp16 = const()[name = string("op_1770_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_1770_to_fp16, x = input_83_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_1780_split_sizes_0 = const()[name = string("op_1780_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1780_axis_0 = const()[name = string("op_1780_axis_0"), val = int32(-1)]; + tensor var_1780_cast_fp16_0, tensor var_1780_cast_fp16_1 = split(axis = var_1780_axis_0, split_sizes = var_1780_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_1780_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394902400)))]; + tensor h_21_cast_fp16 = mul(x = var_1780_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_1791 = const()[name = string("op_1791"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_1792 = transpose(perm = var_1791, x = h_21_cast_fp16)[name = string("transpose_60")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_1792)[name = string("input_85")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_85)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_85)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_87 = mul(x = gate_15, y = up_7)[name = string("input_87")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_87)[name = string("mlp_out_7")]; + tensor var_1832_axes_0 = const()[name = string("op_1832_axes_0"), val = tensor([2])]; + tensor var_1832 = squeeze(axes = var_1832_axes_0, x = mlp_out_7)[name = string("op_1832")]; + tensor var_1836 = const()[name = string("op_1836"), val = tensor([0, 2, 1])]; + int32 var_1842 = const()[name = string("op_1842"), val = int32(-1)]; + fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; + tensor x_59 = transpose(perm = var_1836, x = var_1832)[name = string("transpose_59")]; + tensor var_1844 = mul(x = x_59, y = const_29_promoted)[name = string("op_1844")]; + bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; + tensor input_89 = concat(axis = var_1842, interleave = input_89_interleave_0, values = (x_59, var_1844))[name = string("input_89")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_1839_to_fp16 = const()[name = string("op_1839_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_1839_to_fp16, x = input_89)[name = string("normed_89_cast_fp16")]; + tensor var_1849_split_sizes_0 = const()[name = string("op_1849_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1849_axis_0 = const()[name = string("op_1849_axis_0"), val = int32(-1)]; + tensor var_1849_0, tensor var_1849_1 = split(axis = var_1849_axis_0, split_sizes = var_1849_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_1849")]; + tensor hidden_states_33 = mul(x = var_1849_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_57_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 6912])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 1, 7168])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_1877 = const()[name = string("op_1877"), val = tensor([0, 2, 1])]; + tensor input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor([2])]; + tensor var_1878 = transpose(perm = var_1877, x = hidden_states_35_cast_fp16)[name = string("transpose_58")]; + tensor input_91 = expand_dims(axes = input_91_axes_0, x = var_1878)[name = string("input_91")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_91)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_1897 = const()[name = string("op_1897"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_1898_cast_fp16 = transpose(perm = var_1897, x = per_layer_slice_7_cast_fp16)[name = string("transpose_57")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_1898_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_93_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_93_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394907584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395235328))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_93_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_1914_axes_0 = const()[name = string("op_1914_axes_0"), val = tensor([2])]; + tensor var_1914_cast_fp16 = squeeze(axes = var_1914_axes_0, x = gated_23_cast_fp16)[name = string("op_1914_cast_fp16")]; + tensor var_1918 = const()[name = string("op_1918"), val = tensor([0, 2, 1])]; + int32 var_1924 = const()[name = string("op_1924"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_61_cast_fp16 = transpose(perm = var_1918, x = var_1914_cast_fp16)[name = string("transpose_56")]; + tensor var_1926_cast_fp16 = mul(x = x_61_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_1926_cast_fp16")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95_cast_fp16 = concat(axis = var_1924, interleave = input_95_interleave_0, values = (x_61_cast_fp16, var_1926_cast_fp16))[name = string("input_95_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_1921_to_fp16 = const()[name = string("op_1921_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_1921_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor var_1931_split_sizes_0 = const()[name = string("op_1931_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1931_axis_0 = const()[name = string("op_1931_axis_0"), val = int32(-1)]; + tensor var_1931_cast_fp16_0, tensor var_1931_cast_fp16_1 = split(axis = var_1931_axis_0, split_sizes = var_1931_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_1931_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395237952)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_1931_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = tensor([0x1.62p-1])]; + tensor x_63_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_31_promoted_to_fp16)[name = string("x_63_cast_fp16")]; + int32 var_1946 = const()[name = string("op_1946"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1948_cast_fp16 = mul(x = x_63_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_1948_cast_fp16")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97_cast_fp16 = concat(axis = var_1946, interleave = input_97_interleave_0, values = (x_63_cast_fp16, var_1948_cast_fp16))[name = string("input_97_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_1943_to_fp16 = const()[name = string("op_1943_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_1943_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor var_1953_split_sizes_0 = const()[name = string("op_1953_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1953_axis_0 = const()[name = string("op_1953_axis_0"), val = int32(-1)]; + tensor var_1953_cast_fp16_0, tensor var_1953_cast_fp16_1 = split(axis = var_1953_axis_0, split_sizes = var_1953_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_1953_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395243136)))]; + tensor h_25_cast_fp16 = mul(x = var_1953_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_1959 = const()[name = string("op_1959"), val = tensor([0, 2, 1])]; + tensor var_1962_axes_0 = const()[name = string("op_1962_axes_0"), val = tensor([2])]; + tensor var_1960_cast_fp16 = transpose(perm = var_1959, x = h_25_cast_fp16)[name = string("transpose_55")]; + tensor var_1962_cast_fp16 = expand_dims(axes = var_1962_axes_0, x = var_1960_cast_fp16)[name = string("op_1962_cast_fp16")]; + string var_1978_pad_type_0 = const()[name = string("op_1978_pad_type_0"), val = string("valid")]; + tensor var_1978_strides_0 = const()[name = string("op_1978_strides_0"), val = tensor([1, 1])]; + tensor var_1978_pad_0 = const()[name = string("op_1978_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1978_dilations_0 = const()[name = string("op_1978_dilations_0"), val = tensor([1, 1])]; + int32 var_1978_groups_0 = const()[name = string("op_1978_groups_0"), val = int32(1)]; + tensor var_1978 = conv(dilations = var_1978_dilations_0, groups = var_1978_groups_0, pad = var_1978_pad_0, pad_type = var_1978_pad_type_0, strides = var_1978_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_1962_cast_fp16)[name = string("op_1978")]; + tensor var_1983 = const()[name = string("op_1983"), val = tensor([1, 8, 256, 1])]; + tensor var_1984 = reshape(shape = var_1983, x = var_1978)[name = string("op_1984")]; + tensor var_1989 = const()[name = string("op_1989"), val = tensor([0, 1, 3, 2])]; + tensor var_1999 = const()[name = string("op_1999"), val = tensor([1, 8, 256])]; + tensor var_1990 = transpose(perm = var_1989, x = var_1984)[name = string("transpose_54")]; + tensor x_65 = reshape(shape = var_1999, x = var_1990)[name = string("x_65")]; + int32 var_2005 = const()[name = string("op_2005"), val = int32(-1)]; + fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; + tensor var_2007 = mul(x = x_65, y = const_33_promoted)[name = string("op_2007")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101 = concat(axis = var_2005, interleave = input_101_interleave_0, values = (x_65, var_2007))[name = string("input_101")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_2002_to_fp16 = const()[name = string("op_2002_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2002_to_fp16, x = input_101)[name = string("normed_101_cast_fp16")]; + tensor var_2012_split_sizes_0 = const()[name = string("op_2012_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2012_axis_0 = const()[name = string("op_2012_axis_0"), val = int32(-1)]; + tensor var_2012_0, tensor var_2012_1 = split(axis = var_2012_axis_0, split_sizes = var_2012_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2012")]; + tensor var_2014 = mul(x = var_2012_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2014")]; + tensor var_2019 = const()[name = string("op_2019"), val = tensor([1, 8, 1, 256])]; + tensor q_27 = reshape(shape = var_2019, x = var_2014)[name = string("q_27")]; + tensor var_2021_cast_fp16 = mul(x = q_27, y = cos_s)[name = string("op_2021_cast_fp16")]; + tensor var_2022_split_sizes_0 = const()[name = string("op_2022_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2022_axis_0 = const()[name = string("op_2022_axis_0"), val = int32(-1)]; + tensor var_2022_0, tensor var_2022_1 = split(axis = var_2022_axis_0, split_sizes = var_2022_split_sizes_0, x = q_27)[name = string("op_2022")]; + fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; + tensor var_2024 = mul(x = var_2022_1, y = const_34_promoted)[name = string("op_2024")]; + int32 var_2026 = const()[name = string("op_2026"), val = int32(-1)]; + bool var_2027_interleave_0 = const()[name = string("op_2027_interleave_0"), val = bool(false)]; + tensor var_2027 = concat(axis = var_2026, interleave = var_2027_interleave_0, values = (var_2024, var_2022_0))[name = string("op_2027")]; + tensor var_2028_cast_fp16 = mul(x = var_2027, y = sin_s)[name = string("op_2028_cast_fp16")]; + tensor q_29_cast_fp16 = add(x = var_2021_cast_fp16, y = var_2028_cast_fp16)[name = string("q_29_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_29_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_4")]; + tensor var_2060 = sub(x = x_67_cast_fp16, y = reduce_max_4)[name = string("op_2060")]; + tensor var_2066 = exp(x = var_2060)[name = string("op_2066")]; + tensor var_2076_axes_0 = const()[name = string("op_2076_axes_0"), val = tensor([-1])]; + bool var_2076_keep_dims_0 = const()[name = string("op_2076_keep_dims_0"), val = bool(true)]; + tensor var_2076 = reduce_sum(axes = var_2076_axes_0, keep_dims = var_2076_keep_dims_0, x = var_2066)[name = string("op_2076")]; + tensor var_2082_cast_fp16 = real_div(x = var_2066, y = var_2076)[name = string("op_2082_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_2082_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_2093 = const()[name = string("op_2093"), val = tensor([0, 2, 1, 3])]; + tensor var_2100 = const()[name = string("op_2100"), val = tensor([1, 1, -1])]; + tensor var_2094_cast_fp16 = transpose(perm = var_2093, x = attn_output_25_cast_fp16)[name = string("transpose_53")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_2100, x = var_2094_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_2105 = const()[name = string("op_2105"), val = tensor([0, 2, 1])]; + string var_2121_pad_type_0 = const()[name = string("op_2121_pad_type_0"), val = string("valid")]; + int32 var_2121_groups_0 = const()[name = string("op_2121_groups_0"), val = int32(1)]; + tensor var_2121_strides_0 = const()[name = string("op_2121_strides_0"), val = tensor([1])]; + tensor var_2121_pad_0 = const()[name = string("op_2121_pad_0"), val = tensor([0, 0])]; + tensor var_2121_dilations_0 = const()[name = string("op_2121_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395248320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397869824))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2106_cast_fp16 = transpose(perm = var_2105, x = attn_output_27_cast_fp16)[name = string("transpose_52")]; + tensor var_2121_cast_fp16 = conv(dilations = var_2121_dilations_0, groups = var_2121_groups_0, pad = var_2121_pad_0, pad_type = var_2121_pad_type_0, strides = var_2121_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_2106_cast_fp16)[name = string("op_2121_cast_fp16")]; + tensor var_2125 = const()[name = string("op_2125"), val = tensor([0, 2, 1])]; + int32 var_2131 = const()[name = string("op_2131"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_2125, x = var_2121_cast_fp16)[name = string("transpose_51")]; + tensor var_2133_cast_fp16 = mul(x = x_71_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2133_cast_fp16")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105_cast_fp16 = concat(axis = var_2131, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2133_cast_fp16))[name = string("input_105_cast_fp16")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_2128_to_fp16 = const()[name = string("op_2128_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2128_to_fp16, x = input_105_cast_fp16)[name = string("normed_105_cast_fp16")]; + tensor var_2138_split_sizes_0 = const()[name = string("op_2138_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2138_axis_0 = const()[name = string("op_2138_axis_0"), val = int32(-1)]; + tensor var_2138_cast_fp16_0, tensor var_2138_cast_fp16_1 = split(axis = var_2138_axis_0, split_sizes = var_2138_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2138_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397872448)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_2138_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_63_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_2147 = const()[name = string("op_2147"), val = int32(-1)]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2149_cast_fp16 = mul(x = x_73_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2149_cast_fp16")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107_cast_fp16 = concat(axis = var_2147, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2149_cast_fp16))[name = string("input_107_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_2144_to_fp16 = const()[name = string("op_2144_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2144_to_fp16, x = input_107_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor var_2154_split_sizes_0 = const()[name = string("op_2154_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2154_axis_0 = const()[name = string("op_2154_axis_0"), val = int32(-1)]; + tensor var_2154_cast_fp16_0, tensor var_2154_cast_fp16_1 = split(axis = var_2154_axis_0, split_sizes = var_2154_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2154_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397877632)))]; + tensor h_27_cast_fp16 = mul(x = var_2154_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_2165 = const()[name = string("op_2165"), val = tensor([0, 2, 1])]; + tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([2])]; + tensor var_2166 = transpose(perm = var_2165, x = h_27_cast_fp16)[name = string("transpose_50")]; + tensor input_109 = expand_dims(axes = input_109_axes_0, x = var_2166)[name = string("input_109")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_111 = mul(x = gate_19, y = up_9)[name = string("input_111")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_9")]; + tensor var_2206_axes_0 = const()[name = string("op_2206_axes_0"), val = tensor([2])]; + tensor var_2206 = squeeze(axes = var_2206_axes_0, x = mlp_out_9)[name = string("op_2206")]; + tensor var_2210 = const()[name = string("op_2210"), val = tensor([0, 2, 1])]; + int32 var_2216 = const()[name = string("op_2216"), val = int32(-1)]; + fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_2210, x = var_2206)[name = string("transpose_49")]; + tensor var_2218 = mul(x = x_75, y = const_37_promoted)[name = string("op_2218")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_2216, interleave = input_113_interleave_0, values = (x_75, var_2218))[name = string("input_113")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_2213_to_fp16 = const()[name = string("op_2213_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2213_to_fp16, x = input_113)[name = string("normed_113_cast_fp16")]; + tensor var_2223_split_sizes_0 = const()[name = string("op_2223_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2223_axis_0 = const()[name = string("op_2223_axis_0"), val = int32(-1)]; + tensor var_2223_0, tensor var_2223_1 = split(axis = var_2223_axis_0, split_sizes = var_2223_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2223")]; + tensor hidden_states_43 = mul(x = var_2223_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 7168])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 1, 7424])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_2251 = const()[name = string("op_2251"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_2252 = transpose(perm = var_2251, x = hidden_states_45_cast_fp16)[name = string("transpose_48")]; + tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_2252)[name = string("input_115")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_2271 = const()[name = string("op_2271"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_2272_cast_fp16 = transpose(perm = var_2271, x = per_layer_slice_9_cast_fp16)[name = string("transpose_47")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_2272_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_117_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397882816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398210560))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_2288_axes_0 = const()[name = string("op_2288_axes_0"), val = tensor([2])]; + tensor var_2288_cast_fp16 = squeeze(axes = var_2288_axes_0, x = gated_29_cast_fp16)[name = string("op_2288_cast_fp16")]; + tensor var_2292 = const()[name = string("op_2292"), val = tensor([0, 2, 1])]; + int32 var_2298 = const()[name = string("op_2298"), val = int32(-1)]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_2292, x = var_2288_cast_fp16)[name = string("transpose_46")]; + tensor var_2300_cast_fp16 = mul(x = x_77_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2300_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_2298, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2300_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_2295_to_fp16 = const()[name = string("op_2295_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2295_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor var_2305_split_sizes_0 = const()[name = string("op_2305_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2305_axis_0 = const()[name = string("op_2305_axis_0"), val = int32(-1)]; + tensor var_2305_cast_fp16_0, tensor var_2305_cast_fp16_1 = split(axis = var_2305_axis_0, split_sizes = var_2305_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2305_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398213184)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_2305_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = tensor([0x1.3ap-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_39_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + int32 var_2320 = const()[name = string("op_2320"), val = int32(-1)]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2322_cast_fp16 = mul(x = x_79_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2322_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_2320, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2322_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_2317_to_fp16 = const()[name = string("op_2317_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2317_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor var_2327_split_sizes_0 = const()[name = string("op_2327_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2327_axis_0 = const()[name = string("op_2327_axis_0"), val = int32(-1)]; + tensor var_2327_cast_fp16_0, tensor var_2327_cast_fp16_1 = split(axis = var_2327_axis_0, split_sizes = var_2327_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2327_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398218368)))]; + tensor h_31_cast_fp16 = mul(x = var_2327_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_2333 = const()[name = string("op_2333"), val = tensor([0, 2, 1])]; + tensor var_2336_axes_0 = const()[name = string("op_2336_axes_0"), val = tensor([2])]; + tensor var_2334_cast_fp16 = transpose(perm = var_2333, x = h_31_cast_fp16)[name = string("transpose_45")]; + tensor var_2336_cast_fp16 = expand_dims(axes = var_2336_axes_0, x = var_2334_cast_fp16)[name = string("op_2336_cast_fp16")]; + string var_2352_pad_type_0 = const()[name = string("op_2352_pad_type_0"), val = string("valid")]; + tensor var_2352_strides_0 = const()[name = string("op_2352_strides_0"), val = tensor([1, 1])]; + tensor var_2352_pad_0 = const()[name = string("op_2352_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2352_dilations_0 = const()[name = string("op_2352_dilations_0"), val = tensor([1, 1])]; + int32 var_2352_groups_0 = const()[name = string("op_2352_groups_0"), val = int32(1)]; + tensor var_2352 = conv(dilations = var_2352_dilations_0, groups = var_2352_groups_0, pad = var_2352_pad_0, pad_type = var_2352_pad_type_0, strides = var_2352_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_2336_cast_fp16)[name = string("op_2352")]; + tensor var_2357 = const()[name = string("op_2357"), val = tensor([1, 8, 512, 1])]; + tensor var_2358 = reshape(shape = var_2357, x = var_2352)[name = string("op_2358")]; + tensor var_2363 = const()[name = string("op_2363"), val = tensor([0, 1, 3, 2])]; + tensor var_2373 = const()[name = string("op_2373"), val = tensor([1, 8, 512])]; + tensor var_2364 = transpose(perm = var_2363, x = var_2358)[name = string("transpose_44")]; + tensor x_81 = reshape(shape = var_2373, x = var_2364)[name = string("x_81")]; + int32 var_2379 = const()[name = string("op_2379"), val = int32(-1)]; + fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; + tensor var_2381 = mul(x = x_81, y = const_41_promoted)[name = string("op_2381")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_2379, interleave = input_125_interleave_0, values = (x_81, var_2381))[name = string("input_125")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_2376_to_fp16 = const()[name = string("op_2376_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2376_to_fp16, x = input_125)[name = string("normed_125_cast_fp16")]; + tensor var_2386_split_sizes_0 = const()[name = string("op_2386_split_sizes_0"), val = tensor([512, 512])]; + int32 var_2386_axis_0 = const()[name = string("op_2386_axis_0"), val = int32(-1)]; + tensor var_2386_0, tensor var_2386_1 = split(axis = var_2386_axis_0, split_sizes = var_2386_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2386")]; + tensor var_2388 = mul(x = var_2386_0, y = layers_5_self_attn_q_norm_weight)[name = string("op_2388")]; + tensor var_2393 = const()[name = string("op_2393"), val = tensor([1, 8, 1, 512])]; + tensor q_33 = reshape(shape = var_2393, x = var_2388)[name = string("q_33")]; + tensor var_2395_cast_fp16 = mul(x = q_33, y = cos_f)[name = string("op_2395_cast_fp16")]; + tensor var_2396_split_sizes_0 = const()[name = string("op_2396_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2396_axis_0 = const()[name = string("op_2396_axis_0"), val = int32(-1)]; + tensor var_2396_0, tensor var_2396_1 = split(axis = var_2396_axis_0, split_sizes = var_2396_split_sizes_0, x = q_33)[name = string("op_2396")]; + fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; + tensor var_2398 = mul(x = var_2396_1, y = const_42_promoted)[name = string("op_2398")]; + int32 var_2400 = const()[name = string("op_2400"), val = int32(-1)]; + bool var_2401_interleave_0 = const()[name = string("op_2401_interleave_0"), val = bool(false)]; + tensor var_2401 = concat(axis = var_2400, interleave = var_2401_interleave_0, values = (var_2398, var_2396_0))[name = string("op_2401")]; + tensor var_2402_cast_fp16 = mul(x = var_2401, y = sin_f)[name = string("op_2402_cast_fp16")]; + tensor q_35_cast_fp16 = add(x = var_2395_cast_fp16, y = var_2402_cast_fp16)[name = string("q_35_cast_fp16")]; + tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = kv14_k)[name = string("transpose_43")]; + tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_20, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_42")]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_21, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = kv14_v)[name = string("transpose_41")]; + tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_22, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; + tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_40")]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_23, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor V_expanded_11_perm_0 = const()[name = string("V_expanded_11_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_39")]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_35_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_83_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_full)[name = string("x_83_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_83_cast_fp16)[name = string("reduce_max_5")]; + tensor var_2434 = sub(x = x_83_cast_fp16, y = reduce_max_5)[name = string("op_2434")]; + tensor var_2440 = exp(x = var_2434)[name = string("op_2440")]; + tensor var_2450_axes_0 = const()[name = string("op_2450_axes_0"), val = tensor([-1])]; + bool var_2450_keep_dims_0 = const()[name = string("op_2450_keep_dims_0"), val = bool(true)]; + tensor var_2450 = reduce_sum(axes = var_2450_axes_0, keep_dims = var_2450_keep_dims_0, x = var_2440)[name = string("op_2450")]; + tensor var_2456_cast_fp16 = real_div(x = var_2440, y = var_2450)[name = string("op_2456_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor V_expanded_11_cast_fp16 = transpose(perm = V_expanded_11_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_38")]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_2456_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_2467 = const()[name = string("op_2467"), val = tensor([0, 2, 1, 3])]; + tensor var_2474 = const()[name = string("op_2474"), val = tensor([1, 1, -1])]; + tensor var_2468_cast_fp16 = transpose(perm = var_2467, x = attn_output_31_cast_fp16)[name = string("transpose_37")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_2474, x = var_2468_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_2479 = const()[name = string("op_2479"), val = tensor([0, 2, 1])]; + string var_2495_pad_type_0 = const()[name = string("op_2495_pad_type_0"), val = string("valid")]; + int32 var_2495_groups_0 = const()[name = string("op_2495_groups_0"), val = int32(1)]; + tensor var_2495_strides_0 = const()[name = string("op_2495_strides_0"), val = tensor([1])]; + tensor var_2495_pad_0 = const()[name = string("op_2495_pad_0"), val = tensor([0, 0])]; + tensor var_2495_dilations_0 = const()[name = string("op_2495_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398223552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403466496))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2480_cast_fp16 = transpose(perm = var_2479, x = attn_output_33_cast_fp16)[name = string("transpose_36")]; + tensor var_2495_cast_fp16 = conv(dilations = var_2495_dilations_0, groups = var_2495_groups_0, pad = var_2495_pad_0, pad_type = var_2495_pad_type_0, strides = var_2495_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_2480_cast_fp16)[name = string("op_2495_cast_fp16")]; + tensor var_2499 = const()[name = string("op_2499"), val = tensor([0, 2, 1])]; + int32 var_2505 = const()[name = string("op_2505"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_87_cast_fp16 = transpose(perm = var_2499, x = var_2495_cast_fp16)[name = string("transpose_35")]; + tensor var_2507_cast_fp16 = mul(x = x_87_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2507_cast_fp16")]; + bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)]; + tensor input_129_cast_fp16 = concat(axis = var_2505, interleave = input_129_interleave_0, values = (x_87_cast_fp16, var_2507_cast_fp16))[name = string("input_129_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_2502_to_fp16 = const()[name = string("op_2502_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_2502_to_fp16, x = input_129_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_2512_split_sizes_0 = const()[name = string("op_2512_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2512_axis_0 = const()[name = string("op_2512_axis_0"), val = int32(-1)]; + tensor var_2512_cast_fp16_0, tensor var_2512_cast_fp16_1 = split(axis = var_2512_axis_0, split_sizes = var_2512_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_2512_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403469120)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_2512_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_2521 = const()[name = string("op_2521"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2523_cast_fp16 = mul(x = x_89_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2523_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_2521, interleave = input_131_interleave_0, values = (x_89_cast_fp16, var_2523_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_2518_to_fp16 = const()[name = string("op_2518_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_2518_to_fp16, x = input_131_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor var_2528_split_sizes_0 = const()[name = string("op_2528_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2528_axis_0 = const()[name = string("op_2528_axis_0"), val = int32(-1)]; + tensor var_2528_cast_fp16_0, tensor var_2528_cast_fp16_1 = split(axis = var_2528_axis_0, split_sizes = var_2528_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_2528_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403474304)))]; + tensor h_33_cast_fp16 = mul(x = var_2528_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_2539 = const()[name = string("op_2539"), val = tensor([0, 2, 1])]; + tensor input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor([2])]; + tensor var_2540 = transpose(perm = var_2539, x = h_33_cast_fp16)[name = string("transpose_34")]; + tensor input_133 = expand_dims(axes = input_133_axes_0, x = var_2540)[name = string("input_133")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_133)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_133)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_135 = mul(x = gate_23, y = up_11)[name = string("input_135")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_135)[name = string("mlp_out_11")]; + tensor var_2580_axes_0 = const()[name = string("op_2580_axes_0"), val = tensor([2])]; + tensor var_2580 = squeeze(axes = var_2580_axes_0, x = mlp_out_11)[name = string("op_2580")]; + tensor var_2584 = const()[name = string("op_2584"), val = tensor([0, 2, 1])]; + int32 var_2590 = const()[name = string("op_2590"), val = int32(-1)]; + fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; + tensor x_91 = transpose(perm = var_2584, x = var_2580)[name = string("transpose_33")]; + tensor var_2592 = mul(x = x_91, y = const_45_promoted)[name = string("op_2592")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137 = concat(axis = var_2590, interleave = input_137_interleave_0, values = (x_91, var_2592))[name = string("input_137")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_2587_to_fp16 = const()[name = string("op_2587_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_2587_to_fp16, x = input_137)[name = string("normed_137_cast_fp16")]; + tensor var_2597_split_sizes_0 = const()[name = string("op_2597_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2597_axis_0 = const()[name = string("op_2597_axis_0"), val = int32(-1)]; + tensor var_2597_0, tensor var_2597_1 = split(axis = var_2597_axis_0, split_sizes = var_2597_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_2597")]; + tensor hidden_states_53 = mul(x = var_2597_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 7424])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 1, 7680])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_2625 = const()[name = string("op_2625"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_2626 = transpose(perm = var_2625, x = hidden_states_55_cast_fp16)[name = string("transpose_32")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_2626)[name = string("input_139")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_139)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_2645 = const()[name = string("op_2645"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_2646_cast_fp16 = transpose(perm = var_2645, x = per_layer_slice_11_cast_fp16)[name = string("transpose_31")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_2646_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_141_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_141_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403479488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403807232))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_141_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_2662_axes_0 = const()[name = string("op_2662_axes_0"), val = tensor([2])]; + tensor var_2662_cast_fp16 = squeeze(axes = var_2662_axes_0, x = gated_35_cast_fp16)[name = string("op_2662_cast_fp16")]; + tensor var_2666 = const()[name = string("op_2666"), val = tensor([0, 2, 1])]; + int32 var_2672 = const()[name = string("op_2672"), val = int32(-1)]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_93_cast_fp16 = transpose(perm = var_2666, x = var_2662_cast_fp16)[name = string("transpose_30")]; + tensor var_2674_cast_fp16 = mul(x = x_93_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2674_cast_fp16")]; + bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; + tensor input_143_cast_fp16 = concat(axis = var_2672, interleave = input_143_interleave_0, values = (x_93_cast_fp16, var_2674_cast_fp16))[name = string("input_143_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_2669_to_fp16 = const()[name = string("op_2669_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_2669_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_2679_split_sizes_0 = const()[name = string("op_2679_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2679_axis_0 = const()[name = string("op_2679_axis_0"), val = int32(-1)]; + tensor var_2679_cast_fp16_0, tensor var_2679_cast_fp16_1 = split(axis = var_2679_axis_0, split_sizes = var_2679_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_2679_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403809856)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_2679_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor([0x1.aep-2])]; + tensor x_95_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_95_cast_fp16")]; + int32 var_2694 = const()[name = string("op_2694"), val = int32(-1)]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2696_cast_fp16 = mul(x = x_95_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2696_cast_fp16")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145_cast_fp16 = concat(axis = var_2694, interleave = input_145_interleave_0, values = (x_95_cast_fp16, var_2696_cast_fp16))[name = string("input_145_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_2691_to_fp16 = const()[name = string("op_2691_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_2691_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor var_2701_split_sizes_0 = const()[name = string("op_2701_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2701_axis_0 = const()[name = string("op_2701_axis_0"), val = int32(-1)]; + tensor var_2701_cast_fp16_0, tensor var_2701_cast_fp16_1 = split(axis = var_2701_axis_0, split_sizes = var_2701_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_2701_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403815040)))]; + tensor h_37_cast_fp16 = mul(x = var_2701_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_2707 = const()[name = string("op_2707"), val = tensor([0, 2, 1])]; + tensor var_2710_axes_0 = const()[name = string("op_2710_axes_0"), val = tensor([2])]; + tensor var_2708_cast_fp16 = transpose(perm = var_2707, x = h_37_cast_fp16)[name = string("transpose_29")]; + tensor var_2710_cast_fp16 = expand_dims(axes = var_2710_axes_0, x = var_2708_cast_fp16)[name = string("op_2710_cast_fp16")]; + string var_2726_pad_type_0 = const()[name = string("op_2726_pad_type_0"), val = string("valid")]; + tensor var_2726_strides_0 = const()[name = string("op_2726_strides_0"), val = tensor([1, 1])]; + tensor var_2726_pad_0 = const()[name = string("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_dilations_0 = const()[name = string("op_2726_dilations_0"), val = tensor([1, 1])]; + int32 var_2726_groups_0 = const()[name = string("op_2726_groups_0"), val = int32(1)]; + tensor var_2726 = conv(dilations = var_2726_dilations_0, groups = var_2726_groups_0, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2726_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_2710_cast_fp16)[name = string("op_2726")]; + tensor var_2731 = const()[name = string("op_2731"), val = tensor([1, 8, 256, 1])]; + tensor var_2732 = reshape(shape = var_2731, x = var_2726)[name = string("op_2732")]; + tensor var_2737 = const()[name = string("op_2737"), val = tensor([0, 1, 3, 2])]; + tensor var_2747 = const()[name = string("op_2747"), val = tensor([1, 8, 256])]; + tensor var_2738 = transpose(perm = var_2737, x = var_2732)[name = string("transpose_28")]; + tensor x_97 = reshape(shape = var_2747, x = var_2738)[name = string("x_97")]; + int32 var_2753 = const()[name = string("op_2753"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor var_2755 = mul(x = x_97, y = const_49_promoted)[name = string("op_2755")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149 = concat(axis = var_2753, interleave = input_149_interleave_0, values = (x_97, var_2755))[name = string("input_149")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_2750_to_fp16 = const()[name = string("op_2750_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_2750_to_fp16, x = input_149)[name = string("normed_149_cast_fp16")]; + tensor var_2760_split_sizes_0 = const()[name = string("op_2760_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2760_axis_0 = const()[name = string("op_2760_axis_0"), val = int32(-1)]; + tensor var_2760_0, tensor var_2760_1 = split(axis = var_2760_axis_0, split_sizes = var_2760_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_2760")]; + tensor var_2762 = mul(x = var_2760_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2762")]; + tensor var_2767 = const()[name = string("op_2767"), val = tensor([1, 8, 1, 256])]; + tensor q_39 = reshape(shape = var_2767, x = var_2762)[name = string("q_39")]; + tensor var_2769_cast_fp16 = mul(x = q_39, y = cos_s)[name = string("op_2769_cast_fp16")]; + tensor var_2770_split_sizes_0 = const()[name = string("op_2770_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2770_axis_0 = const()[name = string("op_2770_axis_0"), val = int32(-1)]; + tensor var_2770_0, tensor var_2770_1 = split(axis = var_2770_axis_0, split_sizes = var_2770_split_sizes_0, x = q_39)[name = string("op_2770")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_2772 = mul(x = var_2770_1, y = const_50_promoted)[name = string("op_2772")]; + int32 var_2774 = const()[name = string("op_2774"), val = int32(-1)]; + bool var_2775_interleave_0 = const()[name = string("op_2775_interleave_0"), val = bool(false)]; + tensor var_2775 = concat(axis = var_2774, interleave = var_2775_interleave_0, values = (var_2772, var_2770_0))[name = string("op_2775")]; + tensor var_2776_cast_fp16 = mul(x = var_2775, y = sin_s)[name = string("op_2776_cast_fp16")]; + tensor q_41_cast_fp16 = add(x = var_2769_cast_fp16, y = var_2776_cast_fp16)[name = string("q_41_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_41_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_99_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_99_cast_fp16)[name = string("reduce_max_6")]; + tensor var_2808 = sub(x = x_99_cast_fp16, y = reduce_max_6)[name = string("op_2808")]; + tensor var_2814 = exp(x = var_2808)[name = string("op_2814")]; + tensor var_2824_axes_0 = const()[name = string("op_2824_axes_0"), val = tensor([-1])]; + bool var_2824_keep_dims_0 = const()[name = string("op_2824_keep_dims_0"), val = bool(true)]; + tensor var_2824 = reduce_sum(axes = var_2824_axes_0, keep_dims = var_2824_keep_dims_0, x = var_2814)[name = string("op_2824")]; + tensor var_2830_cast_fp16 = real_div(x = var_2814, y = var_2824)[name = string("op_2830_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_2830_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_2841 = const()[name = string("op_2841"), val = tensor([0, 2, 1, 3])]; + tensor var_2848 = const()[name = string("op_2848"), val = tensor([1, 1, -1])]; + tensor var_2842_cast_fp16 = transpose(perm = var_2841, x = attn_output_37_cast_fp16)[name = string("transpose_27")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_2848, x = var_2842_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_2853 = const()[name = string("op_2853"), val = tensor([0, 2, 1])]; + string var_2869_pad_type_0 = const()[name = string("op_2869_pad_type_0"), val = string("valid")]; + int32 var_2869_groups_0 = const()[name = string("op_2869_groups_0"), val = int32(1)]; + tensor var_2869_strides_0 = const()[name = string("op_2869_strides_0"), val = tensor([1])]; + tensor var_2869_pad_0 = const()[name = string("op_2869_pad_0"), val = tensor([0, 0])]; + tensor var_2869_dilations_0 = const()[name = string("op_2869_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403820224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406441728))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2854_cast_fp16 = transpose(perm = var_2853, x = attn_output_39_cast_fp16)[name = string("transpose_26")]; + tensor var_2869_cast_fp16 = conv(dilations = var_2869_dilations_0, groups = var_2869_groups_0, pad = var_2869_pad_0, pad_type = var_2869_pad_type_0, strides = var_2869_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_2854_cast_fp16)[name = string("op_2869_cast_fp16")]; + tensor var_2873 = const()[name = string("op_2873"), val = tensor([0, 2, 1])]; + int32 var_2879 = const()[name = string("op_2879"), val = int32(-1)]; + fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_103_cast_fp16 = transpose(perm = var_2873, x = var_2869_cast_fp16)[name = string("transpose_25")]; + tensor var_2881_cast_fp16 = mul(x = x_103_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_2881_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_2879, interleave = input_153_interleave_0, values = (x_103_cast_fp16, var_2881_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_2876_to_fp16 = const()[name = string("op_2876_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_2876_to_fp16, x = input_153_cast_fp16)[name = string("normed_153_cast_fp16")]; + tensor var_2886_split_sizes_0 = const()[name = string("op_2886_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2886_axis_0 = const()[name = string("op_2886_axis_0"), val = int32(-1)]; + tensor var_2886_cast_fp16_0, tensor var_2886_cast_fp16_1 = split(axis = var_2886_axis_0, split_sizes = var_2886_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_2886_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406444352)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_2886_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_95_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_105_cast_fp16")]; + int32 var_2895 = const()[name = string("op_2895"), val = int32(-1)]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2897_cast_fp16 = mul(x = x_105_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2897_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_2895, interleave = input_155_interleave_0, values = (x_105_cast_fp16, var_2897_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_2892_to_fp16 = const()[name = string("op_2892_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_2892_to_fp16, x = input_155_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_2902_split_sizes_0 = const()[name = string("op_2902_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2902_axis_0 = const()[name = string("op_2902_axis_0"), val = int32(-1)]; + tensor var_2902_cast_fp16_0, tensor var_2902_cast_fp16_1 = split(axis = var_2902_axis_0, split_sizes = var_2902_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_2902_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406449536)))]; + tensor h_39_cast_fp16 = mul(x = var_2902_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_2913 = const()[name = string("op_2913"), val = tensor([0, 2, 1])]; + tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; + tensor var_2914 = transpose(perm = var_2913, x = h_39_cast_fp16)[name = string("transpose_24")]; + tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_2914)[name = string("input_157")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_157)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_157)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_159 = mul(x = gate_27, y = up_13)[name = string("input_159")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_159)[name = string("mlp_out_13")]; + tensor var_2954_axes_0 = const()[name = string("op_2954_axes_0"), val = tensor([2])]; + tensor var_2954 = squeeze(axes = var_2954_axes_0, x = mlp_out_13)[name = string("op_2954")]; + tensor var_2958 = const()[name = string("op_2958"), val = tensor([0, 2, 1])]; + int32 var_2964 = const()[name = string("op_2964"), val = int32(-1)]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor x_107 = transpose(perm = var_2958, x = var_2954)[name = string("transpose_23")]; + tensor var_2966 = mul(x = x_107, y = const_53_promoted)[name = string("op_2966")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161 = concat(axis = var_2964, interleave = input_161_interleave_0, values = (x_107, var_2966))[name = string("input_161")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_2961_to_fp16 = const()[name = string("op_2961_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_2961_to_fp16, x = input_161)[name = string("normed_161_cast_fp16")]; + tensor var_2971_split_sizes_0 = const()[name = string("op_2971_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2971_axis_0 = const()[name = string("op_2971_axis_0"), val = int32(-1)]; + tensor var_2971_0, tensor var_2971_1 = split(axis = var_2971_axis_0, split_sizes = var_2971_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_2971")]; + tensor hidden_states_63 = mul(x = var_2971_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_105_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 7680])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 1, 7936])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_2999 = const()[name = string("op_2999"), val = tensor([0, 2, 1])]; + tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; + tensor var_3000 = transpose(perm = var_2999, x = hidden_states_65_cast_fp16)[name = string("transpose_22")]; + tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_3000)[name = string("input_163")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_163)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_3019 = const()[name = string("op_3019"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_3020_cast_fp16 = transpose(perm = var_3019, x = per_layer_slice_13_cast_fp16)[name = string("transpose_21")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_3020_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_165_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_165_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406454720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406782464))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_165_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_3036_axes_0 = const()[name = string("op_3036_axes_0"), val = tensor([2])]; + tensor var_3036_cast_fp16 = squeeze(axes = var_3036_axes_0, x = gated_41_cast_fp16)[name = string("op_3036_cast_fp16")]; + tensor var_3040 = const()[name = string("op_3040"), val = tensor([0, 2, 1])]; + int32 var_3046 = const()[name = string("op_3046"), val = int32(-1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_109_cast_fp16 = transpose(perm = var_3040, x = var_3036_cast_fp16)[name = string("transpose_20")]; + tensor var_3048_cast_fp16 = mul(x = x_109_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_3048_cast_fp16")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167_cast_fp16 = concat(axis = var_3046, interleave = input_167_interleave_0, values = (x_109_cast_fp16, var_3048_cast_fp16))[name = string("input_167_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_3043_to_fp16 = const()[name = string("op_3043_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3043_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor var_3053_split_sizes_0 = const()[name = string("op_3053_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3053_axis_0 = const()[name = string("op_3053_axis_0"), val = int32(-1)]; + tensor var_3053_cast_fp16_0, tensor var_3053_cast_fp16_1 = split(axis = var_3053_axis_0, split_sizes = var_3053_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3053_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406785088)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_3053_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = tensor([0x1.6cp-1])]; + tensor x_111_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_55_promoted_to_fp16)[name = string("x_111_cast_fp16")]; + int32 var_3068 = const()[name = string("op_3068"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3070_cast_fp16 = mul(x = x_111_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3070_cast_fp16")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169_cast_fp16 = concat(axis = var_3068, interleave = input_169_interleave_0, values = (x_111_cast_fp16, var_3070_cast_fp16))[name = string("input_169_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_3065_to_fp16 = const()[name = string("op_3065_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3065_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_3075_split_sizes_0 = const()[name = string("op_3075_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3075_axis_0 = const()[name = string("op_3075_axis_0"), val = int32(-1)]; + tensor var_3075_cast_fp16_0, tensor var_3075_cast_fp16_1 = split(axis = var_3075_axis_0, split_sizes = var_3075_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3075_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406790272)))]; + tensor h_43_cast_fp16 = mul(x = var_3075_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_3081 = const()[name = string("op_3081"), val = tensor([0, 2, 1])]; + tensor var_3084_axes_0 = const()[name = string("op_3084_axes_0"), val = tensor([2])]; + tensor var_3082_cast_fp16 = transpose(perm = var_3081, x = h_43_cast_fp16)[name = string("transpose_19")]; + tensor var_3084_cast_fp16 = expand_dims(axes = var_3084_axes_0, x = var_3082_cast_fp16)[name = string("op_3084_cast_fp16")]; + string var_3100_pad_type_0 = const()[name = string("op_3100_pad_type_0"), val = string("valid")]; + tensor var_3100_strides_0 = const()[name = string("op_3100_strides_0"), val = tensor([1, 1])]; + tensor var_3100_pad_0 = const()[name = string("op_3100_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3100_dilations_0 = const()[name = string("op_3100_dilations_0"), val = tensor([1, 1])]; + int32 var_3100_groups_0 = const()[name = string("op_3100_groups_0"), val = int32(1)]; + tensor var_3100 = conv(dilations = var_3100_dilations_0, groups = var_3100_groups_0, pad = var_3100_pad_0, pad_type = var_3100_pad_type_0, strides = var_3100_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3084_cast_fp16)[name = string("op_3100")]; + tensor var_3105 = const()[name = string("op_3105"), val = tensor([1, 8, 256, 1])]; + tensor var_3106 = reshape(shape = var_3105, x = var_3100)[name = string("op_3106")]; + tensor var_3111 = const()[name = string("op_3111"), val = tensor([0, 1, 3, 2])]; + tensor var_3121 = const()[name = string("op_3121"), val = tensor([1, 8, 256])]; + tensor var_3112 = transpose(perm = var_3111, x = var_3106)[name = string("transpose_18")]; + tensor x_113 = reshape(shape = var_3121, x = var_3112)[name = string("x_113")]; + int32 var_3127 = const()[name = string("op_3127"), val = int32(-1)]; + fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; + tensor var_3129 = mul(x = x_113, y = const_57_promoted)[name = string("op_3129")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173 = concat(axis = var_3127, interleave = input_173_interleave_0, values = (x_113, var_3129))[name = string("input_173")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_3124_to_fp16 = const()[name = string("op_3124_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3124_to_fp16, x = input_173)[name = string("normed_173_cast_fp16")]; + tensor var_3134_split_sizes_0 = const()[name = string("op_3134_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3134_axis_0 = const()[name = string("op_3134_axis_0"), val = int32(-1)]; + tensor var_3134_0, tensor var_3134_1 = split(axis = var_3134_axis_0, split_sizes = var_3134_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3134")]; + tensor var_3136 = mul(x = var_3134_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_3136")]; + tensor var_3141 = const()[name = string("op_3141"), val = tensor([1, 8, 1, 256])]; + tensor q_45 = reshape(shape = var_3141, x = var_3136)[name = string("q_45")]; + tensor var_3143_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_3143_cast_fp16")]; + tensor var_3144_split_sizes_0 = const()[name = string("op_3144_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3144_axis_0 = const()[name = string("op_3144_axis_0"), val = int32(-1)]; + tensor var_3144_0, tensor var_3144_1 = split(axis = var_3144_axis_0, split_sizes = var_3144_split_sizes_0, x = q_45)[name = string("op_3144")]; + fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)]; + tensor var_3146 = mul(x = var_3144_1, y = const_58_promoted)[name = string("op_3146")]; + int32 var_3148 = const()[name = string("op_3148"), val = int32(-1)]; + bool var_3149_interleave_0 = const()[name = string("op_3149_interleave_0"), val = bool(false)]; + tensor var_3149 = concat(axis = var_3148, interleave = var_3149_interleave_0, values = (var_3146, var_3144_0))[name = string("op_3149")]; + tensor var_3150_cast_fp16 = mul(x = var_3149, y = sin_s)[name = string("op_3150_cast_fp16")]; + tensor q_47_cast_fp16 = add(x = var_3143_cast_fp16, y = var_3150_cast_fp16)[name = string("q_47_cast_fp16")]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_47_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_115_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_115_cast_fp16)[name = string("reduce_max_7")]; + tensor var_3182 = sub(x = x_115_cast_fp16, y = reduce_max_7)[name = string("op_3182")]; + tensor var_3188 = exp(x = var_3182)[name = string("op_3188")]; + tensor var_3198_axes_0 = const()[name = string("op_3198_axes_0"), val = tensor([-1])]; + bool var_3198_keep_dims_0 = const()[name = string("op_3198_keep_dims_0"), val = bool(true)]; + tensor var_3198 = reduce_sum(axes = var_3198_axes_0, keep_dims = var_3198_keep_dims_0, x = var_3188)[name = string("op_3198")]; + tensor var_3204_cast_fp16 = real_div(x = var_3188, y = var_3198)[name = string("op_3204_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_3204_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_3215 = const()[name = string("op_3215"), val = tensor([0, 2, 1, 3])]; + tensor var_3222 = const()[name = string("op_3222"), val = tensor([1, 1, -1])]; + tensor var_3216_cast_fp16 = transpose(perm = var_3215, x = attn_output_43_cast_fp16)[name = string("transpose_17")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_3222, x = var_3216_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_3227 = const()[name = string("op_3227"), val = tensor([0, 2, 1])]; + string var_3243_pad_type_0 = const()[name = string("op_3243_pad_type_0"), val = string("valid")]; + int32 var_3243_groups_0 = const()[name = string("op_3243_groups_0"), val = int32(1)]; + tensor var_3243_strides_0 = const()[name = string("op_3243_strides_0"), val = tensor([1])]; + tensor var_3243_pad_0 = const()[name = string("op_3243_pad_0"), val = tensor([0, 0])]; + tensor var_3243_dilations_0 = const()[name = string("op_3243_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406795456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409416960))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3228_cast_fp16 = transpose(perm = var_3227, x = attn_output_45_cast_fp16)[name = string("transpose_16")]; + tensor var_3243_cast_fp16 = conv(dilations = var_3243_dilations_0, groups = var_3243_groups_0, pad = var_3243_pad_0, pad_type = var_3243_pad_type_0, strides = var_3243_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_3228_cast_fp16)[name = string("op_3243_cast_fp16")]; + tensor var_3247 = const()[name = string("op_3247"), val = tensor([0, 2, 1])]; + int32 var_3253 = const()[name = string("op_3253"), val = int32(-1)]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_119_cast_fp16 = transpose(perm = var_3247, x = var_3243_cast_fp16)[name = string("transpose_15")]; + tensor var_3255_cast_fp16 = mul(x = x_119_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3255_cast_fp16")]; + bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; + tensor input_177_cast_fp16 = concat(axis = var_3253, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_3255_cast_fp16))[name = string("input_177_cast_fp16")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_3250_to_fp16 = const()[name = string("op_3250_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3250_to_fp16, x = input_177_cast_fp16)[name = string("normed_177_cast_fp16")]; + tensor var_3260_split_sizes_0 = const()[name = string("op_3260_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3260_axis_0 = const()[name = string("op_3260_axis_0"), val = int32(-1)]; + tensor var_3260_cast_fp16_0, tensor var_3260_cast_fp16_1 = split(axis = var_3260_axis_0, split_sizes = var_3260_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3260_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409419584)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_3260_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_111_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_121_cast_fp16")]; + int32 var_3269 = const()[name = string("op_3269"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3271_cast_fp16 = mul(x = x_121_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3271_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_3269, interleave = input_179_interleave_0, values = (x_121_cast_fp16, var_3271_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_3266_to_fp16 = const()[name = string("op_3266_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3266_to_fp16, x = input_179_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor var_3276_split_sizes_0 = const()[name = string("op_3276_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3276_axis_0 = const()[name = string("op_3276_axis_0"), val = int32(-1)]; + tensor var_3276_cast_fp16_0, tensor var_3276_cast_fp16_1 = split(axis = var_3276_axis_0, split_sizes = var_3276_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3276_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409424768)))]; + tensor h_45_cast_fp16 = mul(x = var_3276_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_3287 = const()[name = string("op_3287"), val = tensor([0, 2, 1])]; + tensor input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor([2])]; + tensor var_3288 = transpose(perm = var_3287, x = h_45_cast_fp16)[name = string("transpose_14")]; + tensor input_181 = expand_dims(axes = input_181_axes_0, x = var_3288)[name = string("input_181")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_183 = mul(x = gate_31, y = up_15)[name = string("input_183")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_15")]; + tensor var_3328_axes_0 = const()[name = string("op_3328_axes_0"), val = tensor([2])]; + tensor var_3328 = squeeze(axes = var_3328_axes_0, x = mlp_out_15)[name = string("op_3328")]; + tensor var_3332 = const()[name = string("op_3332"), val = tensor([0, 2, 1])]; + int32 var_3338 = const()[name = string("op_3338"), val = int32(-1)]; + fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; + tensor x_123 = transpose(perm = var_3332, x = var_3328)[name = string("transpose_13")]; + tensor var_3340 = mul(x = x_123, y = const_61_promoted)[name = string("op_3340")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_3338, interleave = input_185_interleave_0, values = (x_123, var_3340))[name = string("input_185")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_3335_to_fp16 = const()[name = string("op_3335_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3335_to_fp16, x = input_185)[name = string("normed_185_cast_fp16")]; + tensor var_3345_split_sizes_0 = const()[name = string("op_3345_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3345_axis_0 = const()[name = string("op_3345_axis_0"), val = int32(-1)]; + tensor var_3345_0, tensor var_3345_1 = split(axis = var_3345_axis_0, split_sizes = var_3345_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3345")]; + tensor hidden_states_73 = mul(x = var_3345_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_121_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 7936])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 1, 8192])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_3373 = const()[name = string("op_3373"), val = tensor([0, 2, 1])]; + tensor input_187_axes_0 = const()[name = string("input_187_axes_0"), val = tensor([2])]; + tensor var_3374 = transpose(perm = var_3373, x = hidden_states_75_cast_fp16)[name = string("transpose_12")]; + tensor input_187 = expand_dims(axes = input_187_axes_0, x = var_3374)[name = string("input_187")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_187)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_3393 = const()[name = string("op_3393"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_3394_cast_fp16 = transpose(perm = var_3393, x = per_layer_slice_15_cast_fp16)[name = string("transpose_11")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_3394_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_189_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_189_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409429952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409757696))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_189_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_3410_axes_0 = const()[name = string("op_3410_axes_0"), val = tensor([2])]; + tensor var_3410_cast_fp16 = squeeze(axes = var_3410_axes_0, x = gated_47_cast_fp16)[name = string("op_3410_cast_fp16")]; + tensor var_3414 = const()[name = string("op_3414"), val = tensor([0, 2, 1])]; + int32 var_3420 = const()[name = string("op_3420"), val = int32(-1)]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_125_cast_fp16 = transpose(perm = var_3414, x = var_3410_cast_fp16)[name = string("transpose_10")]; + tensor var_3422_cast_fp16 = mul(x = x_125_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_3422_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_3420, interleave = input_191_interleave_0, values = (x_125_cast_fp16, var_3422_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_3417_to_fp16 = const()[name = string("op_3417_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3417_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor var_3427_split_sizes_0 = const()[name = string("op_3427_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3427_axis_0 = const()[name = string("op_3427_axis_0"), val = int32(-1)]; + tensor var_3427_cast_fp16_0, tensor var_3427_cast_fp16_1 = split(axis = var_3427_axis_0, split_sizes = var_3427_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3427_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409760320)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_3427_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor([0x1.a2p-1])]; + tensor x_127_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_63_promoted_to_fp16)[name = string("x_127_cast_fp16")]; + int32 var_3442 = const()[name = string("op_3442"), val = int32(-1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3444_cast_fp16 = mul(x = x_127_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_3444_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_3442, interleave = input_193_interleave_0, values = (x_127_cast_fp16, var_3444_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_3439_to_fp16 = const()[name = string("op_3439_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3439_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor var_3449_split_sizes_0 = const()[name = string("op_3449_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3449_axis_0 = const()[name = string("op_3449_axis_0"), val = int32(-1)]; + tensor var_3449_cast_fp16_0, tensor var_3449_cast_fp16_1 = split(axis = var_3449_axis_0, split_sizes = var_3449_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3449_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409765504)))]; + tensor h_49_cast_fp16 = mul(x = var_3449_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_3455 = const()[name = string("op_3455"), val = tensor([0, 2, 1])]; + tensor var_3458_axes_0 = const()[name = string("op_3458_axes_0"), val = tensor([2])]; + tensor var_3456_cast_fp16 = transpose(perm = var_3455, x = h_49_cast_fp16)[name = string("transpose_9")]; + tensor var_3458_cast_fp16 = expand_dims(axes = var_3458_axes_0, x = var_3456_cast_fp16)[name = string("op_3458_cast_fp16")]; + string var_3474_pad_type_0 = const()[name = string("op_3474_pad_type_0"), val = string("valid")]; + tensor var_3474_strides_0 = const()[name = string("op_3474_strides_0"), val = tensor([1, 1])]; + tensor var_3474_pad_0 = const()[name = string("op_3474_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3474_dilations_0 = const()[name = string("op_3474_dilations_0"), val = tensor([1, 1])]; + int32 var_3474_groups_0 = const()[name = string("op_3474_groups_0"), val = int32(1)]; + tensor var_3474 = conv(dilations = var_3474_dilations_0, groups = var_3474_groups_0, pad = var_3474_pad_0, pad_type = var_3474_pad_type_0, strides = var_3474_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_3458_cast_fp16)[name = string("op_3474")]; + tensor var_3479 = const()[name = string("op_3479"), val = tensor([1, 8, 256, 1])]; + tensor var_3480 = reshape(shape = var_3479, x = var_3474)[name = string("op_3480")]; + tensor var_3485 = const()[name = string("op_3485"), val = tensor([0, 1, 3, 2])]; + tensor var_3495 = const()[name = string("op_3495"), val = tensor([1, 8, 256])]; + tensor var_3486 = transpose(perm = var_3485, x = var_3480)[name = string("transpose_8")]; + tensor x_129 = reshape(shape = var_3495, x = var_3486)[name = string("x_129")]; + int32 var_3501 = const()[name = string("op_3501"), val = int32(-1)]; + fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)]; + tensor var_3503 = mul(x = x_129, y = const_65_promoted)[name = string("op_3503")]; + bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; + tensor input_197 = concat(axis = var_3501, interleave = input_197_interleave_0, values = (x_129, var_3503))[name = string("input_197")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_3498_to_fp16 = const()[name = string("op_3498_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_3498_to_fp16, x = input_197)[name = string("normed_197_cast_fp16")]; + tensor var_3508_split_sizes_0 = const()[name = string("op_3508_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3508_axis_0 = const()[name = string("op_3508_axis_0"), val = int32(-1)]; + tensor var_3508_0, tensor var_3508_1 = split(axis = var_3508_axis_0, split_sizes = var_3508_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_3508")]; + tensor var_3510 = mul(x = var_3508_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_3510")]; + tensor var_3515 = const()[name = string("op_3515"), val = tensor([1, 8, 1, 256])]; + tensor q_51 = reshape(shape = var_3515, x = var_3510)[name = string("q_51")]; + tensor var_3517_cast_fp16 = mul(x = q_51, y = cos_s)[name = string("op_3517_cast_fp16")]; + tensor var_3518_split_sizes_0 = const()[name = string("op_3518_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3518_axis_0 = const()[name = string("op_3518_axis_0"), val = int32(-1)]; + tensor var_3518_0, tensor var_3518_1 = split(axis = var_3518_axis_0, split_sizes = var_3518_split_sizes_0, x = q_51)[name = string("op_3518")]; + fp16 const_66_promoted = const()[name = string("const_66_promoted"), val = fp16(-0x1p+0)]; + tensor var_3520 = mul(x = var_3518_1, y = const_66_promoted)[name = string("op_3520")]; + int32 var_3522 = const()[name = string("op_3522"), val = int32(-1)]; + bool var_3523_interleave_0 = const()[name = string("op_3523_interleave_0"), val = bool(false)]; + tensor var_3523 = concat(axis = var_3522, interleave = var_3523_interleave_0, values = (var_3520, var_3518_0))[name = string("op_3523")]; + tensor var_3524_cast_fp16 = mul(x = var_3523, y = sin_s)[name = string("op_3524_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_3517_cast_fp16, y = var_3524_cast_fp16)[name = string("q_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_131_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_131_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_131_cast_fp16)[name = string("reduce_max_8")]; + tensor var_3556 = sub(x = x_131_cast_fp16, y = reduce_max_8)[name = string("op_3556")]; + tensor var_3562 = exp(x = var_3556)[name = string("op_3562")]; + tensor var_3572_axes_0 = const()[name = string("op_3572_axes_0"), val = tensor([-1])]; + bool var_3572_keep_dims_0 = const()[name = string("op_3572_keep_dims_0"), val = bool(true)]; + tensor var_3572 = reduce_sum(axes = var_3572_axes_0, keep_dims = var_3572_keep_dims_0, x = var_3562)[name = string("op_3572")]; + tensor var_3578_cast_fp16 = real_div(x = var_3562, y = var_3572)[name = string("op_3578_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_3578_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_3589 = const()[name = string("op_3589"), val = tensor([0, 2, 1, 3])]; + tensor var_3596 = const()[name = string("op_3596"), val = tensor([1, 1, -1])]; + tensor var_3590_cast_fp16 = transpose(perm = var_3589, x = attn_output_49_cast_fp16)[name = string("transpose_7")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_3596, x = var_3590_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_3601 = const()[name = string("op_3601"), val = tensor([0, 2, 1])]; + string var_3617_pad_type_0 = const()[name = string("op_3617_pad_type_0"), val = string("valid")]; + int32 var_3617_groups_0 = const()[name = string("op_3617_groups_0"), val = int32(1)]; + tensor var_3617_strides_0 = const()[name = string("op_3617_strides_0"), val = tensor([1])]; + tensor var_3617_pad_0 = const()[name = string("op_3617_pad_0"), val = tensor([0, 0])]; + tensor var_3617_dilations_0 = const()[name = string("op_3617_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409770688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412392192))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3602_cast_fp16 = transpose(perm = var_3601, x = attn_output_51_cast_fp16)[name = string("transpose_6")]; + tensor var_3617_cast_fp16 = conv(dilations = var_3617_dilations_0, groups = var_3617_groups_0, pad = var_3617_pad_0, pad_type = var_3617_pad_type_0, strides = var_3617_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_3602_cast_fp16)[name = string("op_3617_cast_fp16")]; + tensor var_3621 = const()[name = string("op_3621"), val = tensor([0, 2, 1])]; + int32 var_3627 = const()[name = string("op_3627"), val = int32(-1)]; + fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_135_cast_fp16 = transpose(perm = var_3621, x = var_3617_cast_fp16)[name = string("transpose_5")]; + tensor var_3629_cast_fp16 = mul(x = x_135_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3629_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_3627, interleave = input_201_interleave_0, values = (x_135_cast_fp16, var_3629_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_3624_to_fp16 = const()[name = string("op_3624_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_3624_to_fp16, x = input_201_cast_fp16)[name = string("normed_201_cast_fp16")]; + tensor var_3634_split_sizes_0 = const()[name = string("op_3634_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3634_axis_0 = const()[name = string("op_3634_axis_0"), val = int32(-1)]; + tensor var_3634_cast_fp16_0, tensor var_3634_cast_fp16_1 = split(axis = var_3634_axis_0, split_sizes = var_3634_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_3634_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412394816)))]; + tensor attn_output_cast_fp16 = mul(x = var_3634_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_137_cast_fp16 = add(x = x_127_cast_fp16, y = attn_output_cast_fp16)[name = string("x_137_cast_fp16")]; + int32 var_3643 = const()[name = string("op_3643"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3645_cast_fp16 = mul(x = x_137_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_3645_cast_fp16")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203_cast_fp16 = concat(axis = var_3643, interleave = input_203_interleave_0, values = (x_137_cast_fp16, var_3645_cast_fp16))[name = string("input_203_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_3640_to_fp16 = const()[name = string("op_3640_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_3640_to_fp16, x = input_203_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor var_3650_split_sizes_0 = const()[name = string("op_3650_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3650_axis_0 = const()[name = string("op_3650_axis_0"), val = int32(-1)]; + tensor var_3650_cast_fp16_0, tensor var_3650_cast_fp16_1 = split(axis = var_3650_axis_0, split_sizes = var_3650_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_3650_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412400000)))]; + tensor h_51_cast_fp16 = mul(x = var_3650_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_3661 = const()[name = string("op_3661"), val = tensor([0, 2, 1])]; + tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; + tensor var_3662 = transpose(perm = var_3661, x = h_51_cast_fp16)[name = string("transpose_4")]; + tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_3662)[name = string("input_205")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_205)[name = string("gate_33")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_205)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_33)[name = string("gate")]; + tensor input_207 = mul(x = gate, y = up)[name = string("input_207")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_207)[name = string("mlp_out")]; + tensor var_3702_axes_0 = const()[name = string("op_3702_axes_0"), val = tensor([2])]; + tensor var_3702 = squeeze(axes = var_3702_axes_0, x = mlp_out)[name = string("op_3702")]; + tensor var_3706 = const()[name = string("op_3706"), val = tensor([0, 2, 1])]; + int32 var_3712 = const()[name = string("op_3712"), val = int32(-1)]; + fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)]; + tensor x_139 = transpose(perm = var_3706, x = var_3702)[name = string("transpose_3")]; + tensor var_3714 = mul(x = x_139, y = const_69_promoted)[name = string("op_3714")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209 = concat(axis = var_3712, interleave = input_209_interleave_0, values = (x_139, var_3714))[name = string("input_209")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_3709_to_fp16 = const()[name = string("op_3709_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_3709_to_fp16, x = input_209)[name = string("normed_209_cast_fp16")]; + tensor var_3719_split_sizes_0 = const()[name = string("op_3719_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3719_axis_0 = const()[name = string("op_3719_axis_0"), val = int32(-1)]; + tensor var_3719_0, tensor var_3719_1 = split(axis = var_3719_axis_0, split_sizes = var_3719_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_3719")]; + tensor hidden_states_83 = mul(x = var_3719_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_137_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 8192])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 1, 8448])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; + tensor var_3747 = const()[name = string("op_3747"), val = tensor([0, 2, 1])]; + tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; + tensor var_3748 = transpose(perm = var_3747, x = hidden_states_85_cast_fp16)[name = string("transpose_2")]; + tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_3748)[name = string("input_211")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_211)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_3767 = const()[name = string("op_3767"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_3768_cast_fp16 = transpose(perm = var_3767, x = per_layer_slice_cast_fp16)[name = string("transpose_1")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_3768_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_213_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_cast_fp16)[name = string("input_213_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412405184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412732928))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_213_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_3784_axes_0 = const()[name = string("op_3784_axes_0"), val = tensor([2])]; + tensor var_3784_cast_fp16 = squeeze(axes = var_3784_axes_0, x = gated_cast_fp16)[name = string("op_3784_cast_fp16")]; + tensor var_3788 = const()[name = string("op_3788"), val = tensor([0, 2, 1])]; + int32 var_3794 = const()[name = string("op_3794"), val = int32(-1)]; + fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_cast_fp16 = transpose(perm = var_3788, x = var_3784_cast_fp16)[name = string("transpose_0")]; + tensor var_3796_cast_fp16 = mul(x = x_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3796_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_3794, interleave = input_interleave_0, values = (x_cast_fp16, var_3796_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_3791_to_fp16 = const()[name = string("op_3791_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_3791_to_fp16, x = input_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_3801_split_sizes_0 = const()[name = string("op_3801_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3801_axis_0 = const()[name = string("op_3801_axis_0"), val = int32(-1)]; + tensor var_3801_cast_fp16_0, tensor var_3801_cast_fp16_1 = split(axis = var_3801_axis_0, split_sizes = var_3801_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_3801_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412735552)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_3801_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = tensor([0x1.b4p-1])]; + tensor hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_71_promoted_to_fp16)[name = string("op_3811_cast_fp16")]; + tensor update_mask_tmp = identity(x = update_mask)[name = string("update_mask_tmp")]; + } -> (hidden_states_out); + func verify_qK(tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor kv13_k, tensor kv13_v, tensor kv14_k, tensor kv14_v, tensor per_layer_combined, tensor sin_f, tensor sin_s) { + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2621568))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2623680)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2624256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15731520))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15741824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28849088))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28859392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41966656))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41969280)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41974464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302208))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44924032))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44926144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58033408))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58043712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71150976))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71161280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84268544))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84271168)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84276352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604096))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87225920))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87228032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100335296))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100345600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113452864))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113463168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126570432))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126573056)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126578240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126905984))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126906304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129527808))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129529920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142637184))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142647488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155754752))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155765056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168872320))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168874944)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168880128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169207872))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169208192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171829696))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171831808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184939072))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184949376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198056640))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198066944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211174208))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211176832)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211182016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211509760))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211510080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216753024))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_self_attn_q_norm_weight = const()[name = string("layers_5_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216757184)))]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216758272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229865536))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229875840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242983104))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242993408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256100672))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256103296)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256108480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436224))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259058048))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259060160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272167424))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272177728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285284992))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298402560))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298405184)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298410368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738112))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301359936))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314469312))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314479616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327586880))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327597184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340704448))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340707072)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340712256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040000))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343661824))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343663936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356771200))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356781504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369888768))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369899072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383006336))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383008960)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383014144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383341888))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + int32 var_449 = const()[name = string("op_449"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_451_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_451_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_449, interleave = input_1_interleave_0, values = (hidden_states, var_451_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_446_to_fp16 = const()[name = string("op_446_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_446_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_456_split_sizes_0 = const()[name = string("op_456_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_456_axis_0 = const()[name = string("op_456_axis_0"), val = int32(-1)]; + tensor var_456_cast_fp16_0, tensor var_456_cast_fp16_1 = split(axis = var_456_axis_0, split_sizes = var_456_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_456_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383342208)))]; + tensor h_1_cast_fp16 = mul(x = var_456_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_462 = const()[name = string("op_462"), val = tensor([0, 2, 1])]; + tensor var_465_axes_0 = const()[name = string("op_465_axes_0"), val = tensor([2])]; + tensor var_463_cast_fp16 = transpose(perm = var_462, x = h_1_cast_fp16)[name = string("transpose_110")]; + tensor var_465_cast_fp16 = expand_dims(axes = var_465_axes_0, x = var_463_cast_fp16)[name = string("op_465_cast_fp16")]; + string q_1_pad_type_0 = const()[name = string("q_1_pad_type_0"), val = string("valid")]; + tensor q_1_strides_0 = const()[name = string("q_1_strides_0"), val = tensor([1, 1])]; + tensor q_1_pad_0 = const()[name = string("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_1_dilations_0 = const()[name = string("q_1_dilations_0"), val = tensor([1, 1])]; + int32 q_1_groups_0 = const()[name = string("q_1_groups_0"), val = int32(1)]; + tensor q_1 = conv(dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_465_cast_fp16)[name = string("q_1")]; + tensor var_486 = const()[name = string("op_486"), val = tensor([1, 8, 256, 3])]; + tensor var_487 = reshape(shape = var_486, x = q_1)[name = string("op_487")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_510 = const()[name = string("op_510"), val = tensor([3, 8, 256])]; + tensor transpose_36 = transpose(perm = transpose_36_perm_0, x = var_487)[name = string("transpose_109")]; + tensor x_1 = reshape(shape = var_510, x = transpose_36)[name = string("x_1")]; + int32 var_516 = const()[name = string("op_516"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_518 = mul(x = x_1, y = const_1_promoted)[name = string("op_518")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_516, interleave = input_5_interleave_0, values = (x_1, var_518))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_513_to_fp16 = const()[name = string("op_513_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_513_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor var_523_split_sizes_0 = const()[name = string("op_523_split_sizes_0"), val = tensor([256, 256])]; + int32 var_523_axis_0 = const()[name = string("op_523_axis_0"), val = int32(-1)]; + tensor var_523_0, tensor var_523_1 = split(axis = var_523_axis_0, split_sizes = var_523_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_523")]; + tensor q_5 = mul(x = var_523_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_5")]; + tensor var_530 = const()[name = string("op_530"), val = tensor([1, 3, 8, 256])]; + tensor var_531 = reshape(shape = var_530, x = q_5)[name = string("op_531")]; + tensor var_536 = const()[name = string("op_536"), val = tensor([0, 2, 1, 3])]; + tensor q_7 = transpose(perm = var_536, x = var_531)[name = string("transpose_108")]; + tensor var_538_cast_fp16 = mul(x = q_7, y = cos_s)[name = string("op_538_cast_fp16")]; + tensor var_539_split_sizes_0 = const()[name = string("op_539_split_sizes_0"), val = tensor([128, 128])]; + int32 var_539_axis_0 = const()[name = string("op_539_axis_0"), val = int32(-1)]; + tensor var_539_0, tensor var_539_1 = split(axis = var_539_axis_0, split_sizes = var_539_split_sizes_0, x = q_7)[name = string("op_539")]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_541 = mul(x = var_539_1, y = const_2_promoted)[name = string("op_541")]; + int32 var_543 = const()[name = string("op_543"), val = int32(-1)]; + bool var_544_interleave_0 = const()[name = string("op_544_interleave_0"), val = bool(false)]; + tensor var_544 = concat(axis = var_543, interleave = var_544_interleave_0, values = (var_541, var_539_0))[name = string("op_544")]; + tensor var_545_cast_fp16 = mul(x = var_544, y = sin_s)[name = string("op_545_cast_fp16")]; + tensor q_9_cast_fp16 = add(x = var_538_cast_fp16, y = var_545_cast_fp16)[name = string("q_9_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = kv13_k)[name = string("transpose_107")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_106")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = kv13_v)[name = string("transpose_105")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_104")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_103")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_9_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_3_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_3_cast_fp16)[name = string("reduce_max_0")]; + tensor var_577 = sub(x = x_3_cast_fp16, y = reduce_max_0)[name = string("op_577")]; + tensor var_583 = exp(x = var_577)[name = string("op_583")]; + tensor var_593_axes_0 = const()[name = string("op_593_axes_0"), val = tensor([-1])]; + bool var_593_keep_dims_0 = const()[name = string("op_593_keep_dims_0"), val = bool(true)]; + tensor var_593 = reduce_sum(axes = var_593_axes_0, keep_dims = var_593_keep_dims_0, x = var_583)[name = string("op_593")]; + tensor var_599_cast_fp16 = real_div(x = var_583, y = var_593)[name = string("op_599_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_102")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_599_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_610 = const()[name = string("op_610"), val = tensor([0, 2, 1, 3])]; + tensor var_617 = const()[name = string("op_617"), val = tensor([1, 3, -1])]; + tensor var_611_cast_fp16 = transpose(perm = var_610, x = attn_output_1_cast_fp16)[name = string("transpose_101")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_617, x = var_611_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_622 = const()[name = string("op_622"), val = tensor([0, 2, 1])]; + string var_638_pad_type_0 = const()[name = string("op_638_pad_type_0"), val = string("valid")]; + int32 var_638_groups_0 = const()[name = string("op_638_groups_0"), val = int32(1)]; + tensor var_638_strides_0 = const()[name = string("op_638_strides_0"), val = tensor([1])]; + tensor var_638_pad_0 = const()[name = string("op_638_pad_0"), val = tensor([0, 0])]; + tensor var_638_dilations_0 = const()[name = string("op_638_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383347392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385968896))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_623_cast_fp16 = transpose(perm = var_622, x = attn_output_3_cast_fp16)[name = string("transpose_100")]; + tensor var_638_cast_fp16 = conv(dilations = var_638_dilations_0, groups = var_638_groups_0, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_638_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_623_cast_fp16)[name = string("op_638_cast_fp16")]; + tensor var_642 = const()[name = string("op_642"), val = tensor([0, 2, 1])]; + int32 var_648 = const()[name = string("op_648"), val = int32(-1)]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_7_cast_fp16 = transpose(perm = var_642, x = var_638_cast_fp16)[name = string("transpose_99")]; + tensor var_650_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_650_cast_fp16")]; + bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; + tensor input_9_cast_fp16 = concat(axis = var_648, interleave = input_9_interleave_0, values = (x_7_cast_fp16, var_650_cast_fp16))[name = string("input_9_cast_fp16")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_645_to_fp16 = const()[name = string("op_645_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_645_to_fp16, x = input_9_cast_fp16)[name = string("normed_9_cast_fp16")]; + tensor var_655_split_sizes_0 = const()[name = string("op_655_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_655_axis_0 = const()[name = string("op_655_axis_0"), val = int32(-1)]; + tensor var_655_cast_fp16_0, tensor var_655_cast_fp16_1 = split(axis = var_655_axis_0, split_sizes = var_655_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_655_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385971520)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_655_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_9_cast_fp16")]; + int32 var_664 = const()[name = string("op_664"), val = int32(-1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_666_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_666_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_664, interleave = input_11_interleave_0, values = (x_9_cast_fp16, var_666_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_661_to_fp16 = const()[name = string("op_661_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_661_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor var_671_split_sizes_0 = const()[name = string("op_671_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_671_axis_0 = const()[name = string("op_671_axis_0"), val = int32(-1)]; + tensor var_671_cast_fp16_0, tensor var_671_cast_fp16_1 = split(axis = var_671_axis_0, split_sizes = var_671_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_671_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385976704)))]; + tensor h_3_cast_fp16 = mul(x = var_671_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_682 = const()[name = string("op_682"), val = tensor([0, 2, 1])]; + tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; + tensor var_683 = transpose(perm = var_682, x = h_3_cast_fp16)[name = string("transpose_98")]; + tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_683)[name = string("input_13")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_15 = mul(x = gate_3, y = up_1)[name = string("input_15")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_15)[name = string("mlp_out_1")]; + tensor var_723_axes_0 = const()[name = string("op_723_axes_0"), val = tensor([2])]; + tensor var_723 = squeeze(axes = var_723_axes_0, x = mlp_out_1)[name = string("op_723")]; + tensor var_727 = const()[name = string("op_727"), val = tensor([0, 2, 1])]; + int32 var_733 = const()[name = string("op_733"), val = int32(-1)]; + fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; + tensor x_11 = transpose(perm = var_727, x = var_723)[name = string("transpose_97")]; + tensor var_735 = mul(x = x_11, y = const_5_promoted)[name = string("op_735")]; + bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; + tensor input_17 = concat(axis = var_733, interleave = input_17_interleave_0, values = (x_11, var_735))[name = string("input_17")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_730_to_fp16 = const()[name = string("op_730_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_730_to_fp16, x = input_17)[name = string("normed_17_cast_fp16")]; + tensor var_740_split_sizes_0 = const()[name = string("op_740_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_740_axis_0 = const()[name = string("op_740_axis_0"), val = int32(-1)]; + tensor var_740_0, tensor var_740_1 = split(axis = var_740_axis_0, split_sizes = var_740_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_740")]; + tensor hidden_states_3 = mul(x = var_740_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 6144])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 3, 6400])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_768 = const()[name = string("op_768"), val = tensor([0, 2, 1])]; + tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([2])]; + tensor var_769 = transpose(perm = var_768, x = hidden_states_5_cast_fp16)[name = string("transpose_96")]; + tensor input_19 = expand_dims(axes = input_19_axes_0, x = var_769)[name = string("input_19")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_19)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_788 = const()[name = string("op_788"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_789_cast_fp16 = transpose(perm = var_788, x = per_layer_slice_1_cast_fp16)[name = string("transpose_95")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_789_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_21_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_21_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385981888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386309632))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_21_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_805_axes_0 = const()[name = string("op_805_axes_0"), val = tensor([2])]; + tensor var_805_cast_fp16 = squeeze(axes = var_805_axes_0, x = gated_5_cast_fp16)[name = string("op_805_cast_fp16")]; + tensor var_809 = const()[name = string("op_809"), val = tensor([0, 2, 1])]; + int32 var_815 = const()[name = string("op_815"), val = int32(-1)]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_13_cast_fp16 = transpose(perm = var_809, x = var_805_cast_fp16)[name = string("transpose_94")]; + tensor var_817_cast_fp16 = mul(x = x_13_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_817_cast_fp16")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23_cast_fp16 = concat(axis = var_815, interleave = input_23_interleave_0, values = (x_13_cast_fp16, var_817_cast_fp16))[name = string("input_23_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_812_to_fp16 = const()[name = string("op_812_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_812_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor var_822_split_sizes_0 = const()[name = string("op_822_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_822_axis_0 = const()[name = string("op_822_axis_0"), val = int32(-1)]; + tensor var_822_cast_fp16_0, tensor var_822_cast_fp16_1 = split(axis = var_822_axis_0, split_sizes = var_822_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_822_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386312256)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_822_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = tensor([0x1.02p-1])]; + tensor x_15_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("x_15_cast_fp16")]; + int32 var_837 = const()[name = string("op_837"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_839_cast_fp16 = mul(x = x_15_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_839_cast_fp16")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25_cast_fp16 = concat(axis = var_837, interleave = input_25_interleave_0, values = (x_15_cast_fp16, var_839_cast_fp16))[name = string("input_25_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_834_to_fp16 = const()[name = string("op_834_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_834_to_fp16, x = input_25_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor var_844_split_sizes_0 = const()[name = string("op_844_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_844_axis_0 = const()[name = string("op_844_axis_0"), val = int32(-1)]; + tensor var_844_cast_fp16_0, tensor var_844_cast_fp16_1 = split(axis = var_844_axis_0, split_sizes = var_844_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_844_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386317440)))]; + tensor h_7_cast_fp16 = mul(x = var_844_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_850 = const()[name = string("op_850"), val = tensor([0, 2, 1])]; + tensor var_853_axes_0 = const()[name = string("op_853_axes_0"), val = tensor([2])]; + tensor var_851_cast_fp16 = transpose(perm = var_850, x = h_7_cast_fp16)[name = string("transpose_93")]; + tensor var_853_cast_fp16 = expand_dims(axes = var_853_axes_0, x = var_851_cast_fp16)[name = string("op_853_cast_fp16")]; + string q_11_pad_type_0 = const()[name = string("q_11_pad_type_0"), val = string("valid")]; + tensor q_11_strides_0 = const()[name = string("q_11_strides_0"), val = tensor([1, 1])]; + tensor q_11_pad_0 = const()[name = string("q_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_11_dilations_0 = const()[name = string("q_11_dilations_0"), val = tensor([1, 1])]; + int32 q_11_groups_0 = const()[name = string("q_11_groups_0"), val = int32(1)]; + tensor q_11 = conv(dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_853_cast_fp16)[name = string("q_11")]; + tensor var_874 = const()[name = string("op_874"), val = tensor([1, 8, 256, 3])]; + tensor var_875 = reshape(shape = var_874, x = q_11)[name = string("op_875")]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_898 = const()[name = string("op_898"), val = tensor([3, 8, 256])]; + tensor transpose_38 = transpose(perm = transpose_38_perm_0, x = var_875)[name = string("transpose_92")]; + tensor x_17 = reshape(shape = var_898, x = transpose_38)[name = string("x_17")]; + int32 var_904 = const()[name = string("op_904"), val = int32(-1)]; + fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; + tensor var_906 = mul(x = x_17, y = const_9_promoted)[name = string("op_906")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29 = concat(axis = var_904, interleave = input_29_interleave_0, values = (x_17, var_906))[name = string("input_29")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_901_to_fp16, x = input_29)[name = string("normed_29_cast_fp16")]; + tensor var_911_split_sizes_0 = const()[name = string("op_911_split_sizes_0"), val = tensor([256, 256])]; + int32 var_911_axis_0 = const()[name = string("op_911_axis_0"), val = int32(-1)]; + tensor var_911_0, tensor var_911_1 = split(axis = var_911_axis_0, split_sizes = var_911_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_911")]; + tensor q_15 = mul(x = var_911_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_15")]; + tensor var_918 = const()[name = string("op_918"), val = tensor([1, 3, 8, 256])]; + tensor var_919 = reshape(shape = var_918, x = q_15)[name = string("op_919")]; + tensor var_924 = const()[name = string("op_924"), val = tensor([0, 2, 1, 3])]; + tensor q_17 = transpose(perm = var_924, x = var_919)[name = string("transpose_91")]; + tensor var_926_cast_fp16 = mul(x = q_17, y = cos_s)[name = string("op_926_cast_fp16")]; + tensor var_927_split_sizes_0 = const()[name = string("op_927_split_sizes_0"), val = tensor([128, 128])]; + int32 var_927_axis_0 = const()[name = string("op_927_axis_0"), val = int32(-1)]; + tensor var_927_0, tensor var_927_1 = split(axis = var_927_axis_0, split_sizes = var_927_split_sizes_0, x = q_17)[name = string("op_927")]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor var_929 = mul(x = var_927_1, y = const_10_promoted)[name = string("op_929")]; + int32 var_931 = const()[name = string("op_931"), val = int32(-1)]; + bool var_932_interleave_0 = const()[name = string("op_932_interleave_0"), val = bool(false)]; + tensor var_932 = concat(axis = var_931, interleave = var_932_interleave_0, values = (var_929, var_927_0))[name = string("op_932")]; + tensor var_933_cast_fp16 = mul(x = var_932, y = sin_s)[name = string("op_933_cast_fp16")]; + tensor q_19_cast_fp16 = add(x = var_926_cast_fp16, y = var_933_cast_fp16)[name = string("q_19_cast_fp16")]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_19_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_19_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_19_cast_fp16)[name = string("reduce_max_1")]; + tensor var_965 = sub(x = x_19_cast_fp16, y = reduce_max_1)[name = string("op_965")]; + tensor var_971 = exp(x = var_965)[name = string("op_971")]; + tensor var_981_axes_0 = const()[name = string("op_981_axes_0"), val = tensor([-1])]; + bool var_981_keep_dims_0 = const()[name = string("op_981_keep_dims_0"), val = bool(true)]; + tensor var_981 = reduce_sum(axes = var_981_axes_0, keep_dims = var_981_keep_dims_0, x = var_971)[name = string("op_981")]; + tensor var_987_cast_fp16 = real_div(x = var_971, y = var_981)[name = string("op_987_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_987_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_998 = const()[name = string("op_998"), val = tensor([0, 2, 1, 3])]; + tensor var_1005 = const()[name = string("op_1005"), val = tensor([1, 3, -1])]; + tensor var_999_cast_fp16 = transpose(perm = var_998, x = attn_output_7_cast_fp16)[name = string("transpose_90")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_1005, x = var_999_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_1010 = const()[name = string("op_1010"), val = tensor([0, 2, 1])]; + string var_1026_pad_type_0 = const()[name = string("op_1026_pad_type_0"), val = string("valid")]; + int32 var_1026_groups_0 = const()[name = string("op_1026_groups_0"), val = int32(1)]; + tensor var_1026_strides_0 = const()[name = string("op_1026_strides_0"), val = tensor([1])]; + tensor var_1026_pad_0 = const()[name = string("op_1026_pad_0"), val = tensor([0, 0])]; + tensor var_1026_dilations_0 = const()[name = string("op_1026_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386322624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388944128))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1011_cast_fp16 = transpose(perm = var_1010, x = attn_output_9_cast_fp16)[name = string("transpose_89")]; + tensor var_1026_cast_fp16 = conv(dilations = var_1026_dilations_0, groups = var_1026_groups_0, pad = var_1026_pad_0, pad_type = var_1026_pad_type_0, strides = var_1026_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1011_cast_fp16)[name = string("op_1026_cast_fp16")]; + tensor var_1030 = const()[name = string("op_1030"), val = tensor([0, 2, 1])]; + int32 var_1036 = const()[name = string("op_1036"), val = int32(-1)]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_23_cast_fp16 = transpose(perm = var_1030, x = var_1026_cast_fp16)[name = string("transpose_88")]; + tensor var_1038_cast_fp16 = mul(x = x_23_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1038_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_1036, interleave = input_33_interleave_0, values = (x_23_cast_fp16, var_1038_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1033_to_fp16 = const()[name = string("op_1033_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1033_to_fp16, x = input_33_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor var_1043_split_sizes_0 = const()[name = string("op_1043_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1043_axis_0 = const()[name = string("op_1043_axis_0"), val = int32(-1)]; + tensor var_1043_cast_fp16_0, tensor var_1043_cast_fp16_1 = split(axis = var_1043_axis_0, split_sizes = var_1043_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1043_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388946752)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1043_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_15_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_1052 = const()[name = string("op_1052"), val = int32(-1)]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1054_cast_fp16 = mul(x = x_25_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1054_cast_fp16")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35_cast_fp16 = concat(axis = var_1052, interleave = input_35_interleave_0, values = (x_25_cast_fp16, var_1054_cast_fp16))[name = string("input_35_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1049_to_fp16 = const()[name = string("op_1049_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1049_to_fp16, x = input_35_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor var_1059_split_sizes_0 = const()[name = string("op_1059_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1059_axis_0 = const()[name = string("op_1059_axis_0"), val = int32(-1)]; + tensor var_1059_cast_fp16_0, tensor var_1059_cast_fp16_1 = split(axis = var_1059_axis_0, split_sizes = var_1059_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1059_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388951936)))]; + tensor h_9_cast_fp16 = mul(x = var_1059_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1070 = const()[name = string("op_1070"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_1071 = transpose(perm = var_1070, x = h_9_cast_fp16)[name = string("transpose_87")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_1071)[name = string("input_37")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_37)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_39 = mul(x = gate_7, y = up_3)[name = string("input_39")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_39)[name = string("mlp_out_3")]; + tensor var_1111_axes_0 = const()[name = string("op_1111_axes_0"), val = tensor([2])]; + tensor var_1111 = squeeze(axes = var_1111_axes_0, x = mlp_out_3)[name = string("op_1111")]; + tensor var_1115 = const()[name = string("op_1115"), val = tensor([0, 2, 1])]; + int32 var_1121 = const()[name = string("op_1121"), val = int32(-1)]; + fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; + tensor x_27 = transpose(perm = var_1115, x = var_1111)[name = string("transpose_86")]; + tensor var_1123 = mul(x = x_27, y = const_13_promoted)[name = string("op_1123")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_1121, interleave = input_41_interleave_0, values = (x_27, var_1123))[name = string("input_41")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1118_to_fp16 = const()[name = string("op_1118_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1118_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")]; + tensor var_1128_split_sizes_0 = const()[name = string("op_1128_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1128_axis_0 = const()[name = string("op_1128_axis_0"), val = int32(-1)]; + tensor var_1128_0, tensor var_1128_1 = split(axis = var_1128_axis_0, split_sizes = var_1128_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1128")]; + tensor hidden_states_13 = mul(x = var_1128_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 6400])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 3, 6656])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1156 = const()[name = string("op_1156"), val = tensor([0, 2, 1])]; + tensor input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor([2])]; + tensor var_1157 = transpose(perm = var_1156, x = hidden_states_15_cast_fp16)[name = string("transpose_85")]; + tensor input_43 = expand_dims(axes = input_43_axes_0, x = var_1157)[name = string("input_43")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_43)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1176 = const()[name = string("op_1176"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1177_cast_fp16 = transpose(perm = var_1176, x = per_layer_slice_3_cast_fp16)[name = string("transpose_84")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1177_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_45_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_45_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388957120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389284864))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_45_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_1193_axes_0 = const()[name = string("op_1193_axes_0"), val = tensor([2])]; + tensor var_1193_cast_fp16 = squeeze(axes = var_1193_axes_0, x = gated_11_cast_fp16)[name = string("op_1193_cast_fp16")]; + tensor var_1197 = const()[name = string("op_1197"), val = tensor([0, 2, 1])]; + int32 var_1203 = const()[name = string("op_1203"), val = int32(-1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_29_cast_fp16 = transpose(perm = var_1197, x = var_1193_cast_fp16)[name = string("transpose_83")]; + tensor var_1205_cast_fp16 = mul(x = x_29_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1205_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_1203, interleave = input_47_interleave_0, values = (x_29_cast_fp16, var_1205_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1200_to_fp16 = const()[name = string("op_1200_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1200_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1210_split_sizes_0 = const()[name = string("op_1210_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1210_axis_0 = const()[name = string("op_1210_axis_0"), val = int32(-1)]; + tensor var_1210_cast_fp16_0, tensor var_1210_cast_fp16_1 = split(axis = var_1210_axis_0, split_sizes = var_1210_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1210_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389287488)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_1210_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = tensor([0x1.6ep-1])]; + tensor x_31_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_15_promoted_to_fp16)[name = string("x_31_cast_fp16")]; + int32 var_1225 = const()[name = string("op_1225"), val = int32(-1)]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1227_cast_fp16 = mul(x = x_31_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1227_cast_fp16")]; + bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; + tensor input_49_cast_fp16 = concat(axis = var_1225, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1227_cast_fp16))[name = string("input_49_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1222_to_fp16 = const()[name = string("op_1222_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1222_to_fp16, x = input_49_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor var_1232_split_sizes_0 = const()[name = string("op_1232_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1232_axis_0 = const()[name = string("op_1232_axis_0"), val = int32(-1)]; + tensor var_1232_cast_fp16_0, tensor var_1232_cast_fp16_1 = split(axis = var_1232_axis_0, split_sizes = var_1232_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1232_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389292672)))]; + tensor h_13_cast_fp16 = mul(x = var_1232_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_1238 = const()[name = string("op_1238"), val = tensor([0, 2, 1])]; + tensor var_1241_axes_0 = const()[name = string("op_1241_axes_0"), val = tensor([2])]; + tensor var_1239_cast_fp16 = transpose(perm = var_1238, x = h_13_cast_fp16)[name = string("transpose_82")]; + tensor var_1241_cast_fp16 = expand_dims(axes = var_1241_axes_0, x = var_1239_cast_fp16)[name = string("op_1241_cast_fp16")]; + string q_21_pad_type_0 = const()[name = string("q_21_pad_type_0"), val = string("valid")]; + tensor q_21_strides_0 = const()[name = string("q_21_strides_0"), val = tensor([1, 1])]; + tensor q_21_pad_0 = const()[name = string("q_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_21_dilations_0 = const()[name = string("q_21_dilations_0"), val = tensor([1, 1])]; + int32 q_21_groups_0 = const()[name = string("q_21_groups_0"), val = int32(1)]; + tensor q_21 = conv(dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1241_cast_fp16)[name = string("q_21")]; + tensor var_1262 = const()[name = string("op_1262"), val = tensor([1, 8, 256, 3])]; + tensor var_1263 = reshape(shape = var_1262, x = q_21)[name = string("op_1263")]; + tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_1286 = const()[name = string("op_1286"), val = tensor([3, 8, 256])]; + tensor transpose_40 = transpose(perm = transpose_40_perm_0, x = var_1263)[name = string("transpose_81")]; + tensor x_33 = reshape(shape = var_1286, x = transpose_40)[name = string("x_33")]; + int32 var_1292 = const()[name = string("op_1292"), val = int32(-1)]; + fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; + tensor var_1294 = mul(x = x_33, y = const_17_promoted)[name = string("op_1294")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53 = concat(axis = var_1292, interleave = input_53_interleave_0, values = (x_33, var_1294))[name = string("input_53")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1289_to_fp16 = const()[name = string("op_1289_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1289_to_fp16, x = input_53)[name = string("normed_53_cast_fp16")]; + tensor var_1299_split_sizes_0 = const()[name = string("op_1299_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1299_axis_0 = const()[name = string("op_1299_axis_0"), val = int32(-1)]; + tensor var_1299_0, tensor var_1299_1 = split(axis = var_1299_axis_0, split_sizes = var_1299_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1299")]; + tensor q_25 = mul(x = var_1299_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_25")]; + tensor var_1306 = const()[name = string("op_1306"), val = tensor([1, 3, 8, 256])]; + tensor var_1307 = reshape(shape = var_1306, x = q_25)[name = string("op_1307")]; + tensor var_1312 = const()[name = string("op_1312"), val = tensor([0, 2, 1, 3])]; + tensor q_27 = transpose(perm = var_1312, x = var_1307)[name = string("transpose_80")]; + tensor var_1314_cast_fp16 = mul(x = q_27, y = cos_s)[name = string("op_1314_cast_fp16")]; + tensor var_1315_split_sizes_0 = const()[name = string("op_1315_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1315_axis_0 = const()[name = string("op_1315_axis_0"), val = int32(-1)]; + tensor var_1315_0, tensor var_1315_1 = split(axis = var_1315_axis_0, split_sizes = var_1315_split_sizes_0, x = q_27)[name = string("op_1315")]; + fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)]; + tensor var_1317 = mul(x = var_1315_1, y = const_18_promoted)[name = string("op_1317")]; + int32 var_1319 = const()[name = string("op_1319"), val = int32(-1)]; + bool var_1320_interleave_0 = const()[name = string("op_1320_interleave_0"), val = bool(false)]; + tensor var_1320 = concat(axis = var_1319, interleave = var_1320_interleave_0, values = (var_1317, var_1315_0))[name = string("op_1320")]; + tensor var_1321_cast_fp16 = mul(x = var_1320, y = sin_s)[name = string("op_1321_cast_fp16")]; + tensor q_29_cast_fp16 = add(x = var_1314_cast_fp16, y = var_1321_cast_fp16)[name = string("q_29_cast_fp16")]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_29_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_sliding)[name = string("x_35_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_35_cast_fp16)[name = string("reduce_max_2")]; + tensor var_1353 = sub(x = x_35_cast_fp16, y = reduce_max_2)[name = string("op_1353")]; + tensor var_1359 = exp(x = var_1353)[name = string("op_1359")]; + tensor var_1369_axes_0 = const()[name = string("op_1369_axes_0"), val = tensor([-1])]; + bool var_1369_keep_dims_0 = const()[name = string("op_1369_keep_dims_0"), val = bool(true)]; + tensor var_1369 = reduce_sum(axes = var_1369_axes_0, keep_dims = var_1369_keep_dims_0, x = var_1359)[name = string("op_1369")]; + tensor var_1375_cast_fp16 = real_div(x = var_1359, y = var_1369)[name = string("op_1375_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_1375_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_1386 = const()[name = string("op_1386"), val = tensor([0, 2, 1, 3])]; + tensor var_1393 = const()[name = string("op_1393"), val = tensor([1, 3, -1])]; + tensor var_1387_cast_fp16 = transpose(perm = var_1386, x = attn_output_13_cast_fp16)[name = string("transpose_79")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_1393, x = var_1387_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_1398 = const()[name = string("op_1398"), val = tensor([0, 2, 1])]; + string var_1414_pad_type_0 = const()[name = string("op_1414_pad_type_0"), val = string("valid")]; + int32 var_1414_groups_0 = const()[name = string("op_1414_groups_0"), val = int32(1)]; + tensor var_1414_strides_0 = const()[name = string("op_1414_strides_0"), val = tensor([1])]; + tensor var_1414_pad_0 = const()[name = string("op_1414_pad_0"), val = tensor([0, 0])]; + tensor var_1414_dilations_0 = const()[name = string("op_1414_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389297856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391919360))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1399_cast_fp16 = transpose(perm = var_1398, x = attn_output_15_cast_fp16)[name = string("transpose_78")]; + tensor var_1414_cast_fp16 = conv(dilations = var_1414_dilations_0, groups = var_1414_groups_0, pad = var_1414_pad_0, pad_type = var_1414_pad_type_0, strides = var_1414_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_1399_cast_fp16)[name = string("op_1414_cast_fp16")]; + tensor var_1418 = const()[name = string("op_1418"), val = tensor([0, 2, 1])]; + int32 var_1424 = const()[name = string("op_1424"), val = int32(-1)]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_39_cast_fp16 = transpose(perm = var_1418, x = var_1414_cast_fp16)[name = string("transpose_77")]; + tensor var_1426_cast_fp16 = mul(x = x_39_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1426_cast_fp16")]; + bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; + tensor input_57_cast_fp16 = concat(axis = var_1424, interleave = input_57_interleave_0, values = (x_39_cast_fp16, var_1426_cast_fp16))[name = string("input_57_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_1421_to_fp16 = const()[name = string("op_1421_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1421_to_fp16, x = input_57_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_1431_split_sizes_0 = const()[name = string("op_1431_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1431_axis_0 = const()[name = string("op_1431_axis_0"), val = int32(-1)]; + tensor var_1431_cast_fp16_0, tensor var_1431_cast_fp16_1 = split(axis = var_1431_axis_0, split_sizes = var_1431_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1431_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391921984)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_1431_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_41_cast_fp16 = add(x = x_31_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_41_cast_fp16")]; + int32 var_1440 = const()[name = string("op_1440"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1442_cast_fp16 = mul(x = x_41_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1442_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_1440, interleave = input_59_interleave_0, values = (x_41_cast_fp16, var_1442_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_1437_to_fp16 = const()[name = string("op_1437_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1437_to_fp16, x = input_59_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor var_1447_split_sizes_0 = const()[name = string("op_1447_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1447_axis_0 = const()[name = string("op_1447_axis_0"), val = int32(-1)]; + tensor var_1447_cast_fp16_0, tensor var_1447_cast_fp16_1 = split(axis = var_1447_axis_0, split_sizes = var_1447_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1447_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391927168)))]; + tensor h_15_cast_fp16 = mul(x = var_1447_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_1458 = const()[name = string("op_1458"), val = tensor([0, 2, 1])]; + tensor input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor([2])]; + tensor var_1459 = transpose(perm = var_1458, x = h_15_cast_fp16)[name = string("transpose_76")]; + tensor input_61 = expand_dims(axes = input_61_axes_0, x = var_1459)[name = string("input_61")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_61)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_61)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_63 = mul(x = gate_11, y = up_5)[name = string("input_63")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_63)[name = string("mlp_out_5")]; + tensor var_1499_axes_0 = const()[name = string("op_1499_axes_0"), val = tensor([2])]; + tensor var_1499 = squeeze(axes = var_1499_axes_0, x = mlp_out_5)[name = string("op_1499")]; + tensor var_1503 = const()[name = string("op_1503"), val = tensor([0, 2, 1])]; + int32 var_1509 = const()[name = string("op_1509"), val = int32(-1)]; + fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; + tensor x_43 = transpose(perm = var_1503, x = var_1499)[name = string("transpose_75")]; + tensor var_1511 = mul(x = x_43, y = const_21_promoted)[name = string("op_1511")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_1509, interleave = input_65_interleave_0, values = (x_43, var_1511))[name = string("input_65")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_1506_to_fp16 = const()[name = string("op_1506_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1506_to_fp16, x = input_65)[name = string("normed_65_cast_fp16")]; + tensor var_1516_split_sizes_0 = const()[name = string("op_1516_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1516_axis_0 = const()[name = string("op_1516_axis_0"), val = int32(-1)]; + tensor var_1516_0, tensor var_1516_1 = split(axis = var_1516_axis_0, split_sizes = var_1516_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1516")]; + tensor hidden_states_23 = mul(x = var_1516_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_41_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 6656])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 3, 6912])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_1544 = const()[name = string("op_1544"), val = tensor([0, 2, 1])]; + tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; + tensor var_1545 = transpose(perm = var_1544, x = hidden_states_25_cast_fp16)[name = string("transpose_74")]; + tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_1545)[name = string("input_67")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_67)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_1565_cast_fp16 = transpose(perm = var_1564, x = per_layer_slice_5_cast_fp16)[name = string("transpose_73")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_1565_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_69_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_69_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391932352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392260096))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_69_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_1581_axes_0 = const()[name = string("op_1581_axes_0"), val = tensor([2])]; + tensor var_1581_cast_fp16 = squeeze(axes = var_1581_axes_0, x = gated_17_cast_fp16)[name = string("op_1581_cast_fp16")]; + tensor var_1585 = const()[name = string("op_1585"), val = tensor([0, 2, 1])]; + int32 var_1591 = const()[name = string("op_1591"), val = int32(-1)]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_45_cast_fp16 = transpose(perm = var_1585, x = var_1581_cast_fp16)[name = string("transpose_72")]; + tensor var_1593_cast_fp16 = mul(x = x_45_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1593_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_1591, interleave = input_71_interleave_0, values = (x_45_cast_fp16, var_1593_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_1588_to_fp16 = const()[name = string("op_1588_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_1588_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor var_1598_split_sizes_0 = const()[name = string("op_1598_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1598_axis_0 = const()[name = string("op_1598_axis_0"), val = int32(-1)]; + tensor var_1598_cast_fp16_0, tensor var_1598_cast_fp16_1 = split(axis = var_1598_axis_0, split_sizes = var_1598_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_1598_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392262720)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_1598_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor([0x1.6ep-1])]; + tensor x_47_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_47_cast_fp16")]; + int32 var_1613 = const()[name = string("op_1613"), val = int32(-1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1615_cast_fp16 = mul(x = x_47_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1615_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_1613, interleave = input_73_interleave_0, values = (x_47_cast_fp16, var_1615_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_1610_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_1620_split_sizes_0 = const()[name = string("op_1620_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1620_axis_0 = const()[name = string("op_1620_axis_0"), val = int32(-1)]; + tensor var_1620_cast_fp16_0, tensor var_1620_cast_fp16_1 = split(axis = var_1620_axis_0, split_sizes = var_1620_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_1620_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392267904)))]; + tensor h_19_cast_fp16 = mul(x = var_1620_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_1626 = const()[name = string("op_1626"), val = tensor([0, 2, 1])]; + tensor var_1629_axes_0 = const()[name = string("op_1629_axes_0"), val = tensor([2])]; + tensor var_1627_cast_fp16 = transpose(perm = var_1626, x = h_19_cast_fp16)[name = string("transpose_71")]; + tensor var_1629_cast_fp16 = expand_dims(axes = var_1629_axes_0, x = var_1627_cast_fp16)[name = string("op_1629_cast_fp16")]; + string q_31_pad_type_0 = const()[name = string("q_31_pad_type_0"), val = string("valid")]; + tensor q_31_strides_0 = const()[name = string("q_31_strides_0"), val = tensor([1, 1])]; + tensor q_31_pad_0 = const()[name = string("q_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_31_dilations_0 = const()[name = string("q_31_dilations_0"), val = tensor([1, 1])]; + int32 q_31_groups_0 = const()[name = string("q_31_groups_0"), val = int32(1)]; + tensor q_31 = conv(dilations = q_31_dilations_0, groups = q_31_groups_0, pad = q_31_pad_0, pad_type = q_31_pad_type_0, strides = q_31_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_1629_cast_fp16)[name = string("q_31")]; + tensor var_1650 = const()[name = string("op_1650"), val = tensor([1, 8, 256, 3])]; + tensor var_1651 = reshape(shape = var_1650, x = q_31)[name = string("op_1651")]; + tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_1674 = const()[name = string("op_1674"), val = tensor([3, 8, 256])]; + tensor transpose_42 = transpose(perm = transpose_42_perm_0, x = var_1651)[name = string("transpose_70")]; + tensor x_49 = reshape(shape = var_1674, x = transpose_42)[name = string("x_49")]; + int32 var_1680 = const()[name = string("op_1680"), val = int32(-1)]; + fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; + tensor var_1682 = mul(x = x_49, y = const_25_promoted)[name = string("op_1682")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77 = concat(axis = var_1680, interleave = input_77_interleave_0, values = (x_49, var_1682))[name = string("input_77")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_1677_to_fp16 = const()[name = string("op_1677_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_1677_to_fp16, x = input_77)[name = string("normed_77_cast_fp16")]; + tensor var_1687_split_sizes_0 = const()[name = string("op_1687_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1687_axis_0 = const()[name = string("op_1687_axis_0"), val = int32(-1)]; + tensor var_1687_0, tensor var_1687_1 = split(axis = var_1687_axis_0, split_sizes = var_1687_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_1687")]; + tensor q_35 = mul(x = var_1687_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_35")]; + tensor var_1694 = const()[name = string("op_1694"), val = tensor([1, 3, 8, 256])]; + tensor var_1695 = reshape(shape = var_1694, x = q_35)[name = string("op_1695")]; + tensor var_1700 = const()[name = string("op_1700"), val = tensor([0, 2, 1, 3])]; + tensor q_37 = transpose(perm = var_1700, x = var_1695)[name = string("transpose_69")]; + tensor var_1702_cast_fp16 = mul(x = q_37, y = cos_s)[name = string("op_1702_cast_fp16")]; + tensor var_1703_split_sizes_0 = const()[name = string("op_1703_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1703_axis_0 = const()[name = string("op_1703_axis_0"), val = int32(-1)]; + tensor var_1703_0, tensor var_1703_1 = split(axis = var_1703_axis_0, split_sizes = var_1703_split_sizes_0, x = q_37)[name = string("op_1703")]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_1705 = mul(x = var_1703_1, y = const_26_promoted)[name = string("op_1705")]; + int32 var_1707 = const()[name = string("op_1707"), val = int32(-1)]; + bool var_1708_interleave_0 = const()[name = string("op_1708_interleave_0"), val = bool(false)]; + tensor var_1708 = concat(axis = var_1707, interleave = var_1708_interleave_0, values = (var_1705, var_1703_0))[name = string("op_1708")]; + tensor var_1709_cast_fp16 = mul(x = var_1708, y = sin_s)[name = string("op_1709_cast_fp16")]; + tensor q_39_cast_fp16 = add(x = var_1702_cast_fp16, y = var_1709_cast_fp16)[name = string("q_39_cast_fp16")]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_39_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_51_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_51_cast_fp16)[name = string("reduce_max_3")]; + tensor var_1741 = sub(x = x_51_cast_fp16, y = reduce_max_3)[name = string("op_1741")]; + tensor var_1747 = exp(x = var_1741)[name = string("op_1747")]; + tensor var_1757_axes_0 = const()[name = string("op_1757_axes_0"), val = tensor([-1])]; + bool var_1757_keep_dims_0 = const()[name = string("op_1757_keep_dims_0"), val = bool(true)]; + tensor var_1757 = reduce_sum(axes = var_1757_axes_0, keep_dims = var_1757_keep_dims_0, x = var_1747)[name = string("op_1757")]; + tensor var_1763_cast_fp16 = real_div(x = var_1747, y = var_1757)[name = string("op_1763_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_1763_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_1774 = const()[name = string("op_1774"), val = tensor([0, 2, 1, 3])]; + tensor var_1781 = const()[name = string("op_1781"), val = tensor([1, 3, -1])]; + tensor var_1775_cast_fp16 = transpose(perm = var_1774, x = attn_output_19_cast_fp16)[name = string("transpose_68")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_1781, x = var_1775_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1786 = const()[name = string("op_1786"), val = tensor([0, 2, 1])]; + string var_1802_pad_type_0 = const()[name = string("op_1802_pad_type_0"), val = string("valid")]; + int32 var_1802_groups_0 = const()[name = string("op_1802_groups_0"), val = int32(1)]; + tensor var_1802_strides_0 = const()[name = string("op_1802_strides_0"), val = tensor([1])]; + tensor var_1802_pad_0 = const()[name = string("op_1802_pad_0"), val = tensor([0, 0])]; + tensor var_1802_dilations_0 = const()[name = string("op_1802_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392273088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394894592))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1787_cast_fp16 = transpose(perm = var_1786, x = attn_output_21_cast_fp16)[name = string("transpose_67")]; + tensor var_1802_cast_fp16 = conv(dilations = var_1802_dilations_0, groups = var_1802_groups_0, pad = var_1802_pad_0, pad_type = var_1802_pad_type_0, strides = var_1802_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_1787_cast_fp16)[name = string("op_1802_cast_fp16")]; + tensor var_1806 = const()[name = string("op_1806"), val = tensor([0, 2, 1])]; + int32 var_1812 = const()[name = string("op_1812"), val = int32(-1)]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_55_cast_fp16 = transpose(perm = var_1806, x = var_1802_cast_fp16)[name = string("transpose_66")]; + tensor var_1814_cast_fp16 = mul(x = x_55_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1814_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_1812, interleave = input_81_interleave_0, values = (x_55_cast_fp16, var_1814_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_1809_to_fp16 = const()[name = string("op_1809_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_1809_to_fp16, x = input_81_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor var_1819_split_sizes_0 = const()[name = string("op_1819_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1819_axis_0 = const()[name = string("op_1819_axis_0"), val = int32(-1)]; + tensor var_1819_cast_fp16_0, tensor var_1819_cast_fp16_1 = split(axis = var_1819_axis_0, split_sizes = var_1819_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_1819_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394897216)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_1819_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_47_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_57_cast_fp16")]; + int32 var_1828 = const()[name = string("op_1828"), val = int32(-1)]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1830_cast_fp16 = mul(x = x_57_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1830_cast_fp16")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83_cast_fp16 = concat(axis = var_1828, interleave = input_83_interleave_0, values = (x_57_cast_fp16, var_1830_cast_fp16))[name = string("input_83_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_1825_to_fp16, x = input_83_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_1835_split_sizes_0 = const()[name = string("op_1835_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1835_axis_0 = const()[name = string("op_1835_axis_0"), val = int32(-1)]; + tensor var_1835_cast_fp16_0, tensor var_1835_cast_fp16_1 = split(axis = var_1835_axis_0, split_sizes = var_1835_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_1835_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394902400)))]; + tensor h_21_cast_fp16 = mul(x = var_1835_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_1846 = const()[name = string("op_1846"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_1847 = transpose(perm = var_1846, x = h_21_cast_fp16)[name = string("transpose_65")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_1847)[name = string("input_85")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_85)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_85)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_87 = mul(x = gate_15, y = up_7)[name = string("input_87")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_87)[name = string("mlp_out_7")]; + tensor var_1887_axes_0 = const()[name = string("op_1887_axes_0"), val = tensor([2])]; + tensor var_1887 = squeeze(axes = var_1887_axes_0, x = mlp_out_7)[name = string("op_1887")]; + tensor var_1891 = const()[name = string("op_1891"), val = tensor([0, 2, 1])]; + int32 var_1897 = const()[name = string("op_1897"), val = int32(-1)]; + fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; + tensor x_59 = transpose(perm = var_1891, x = var_1887)[name = string("transpose_64")]; + tensor var_1899 = mul(x = x_59, y = const_29_promoted)[name = string("op_1899")]; + bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; + tensor input_89 = concat(axis = var_1897, interleave = input_89_interleave_0, values = (x_59, var_1899))[name = string("input_89")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_1894_to_fp16 = const()[name = string("op_1894_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_1894_to_fp16, x = input_89)[name = string("normed_89_cast_fp16")]; + tensor var_1904_split_sizes_0 = const()[name = string("op_1904_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1904_axis_0 = const()[name = string("op_1904_axis_0"), val = int32(-1)]; + tensor var_1904_0, tensor var_1904_1 = split(axis = var_1904_axis_0, split_sizes = var_1904_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_1904")]; + tensor hidden_states_33 = mul(x = var_1904_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_57_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 6912])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 3, 7168])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_1932 = const()[name = string("op_1932"), val = tensor([0, 2, 1])]; + tensor input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor([2])]; + tensor var_1933 = transpose(perm = var_1932, x = hidden_states_35_cast_fp16)[name = string("transpose_63")]; + tensor input_91 = expand_dims(axes = input_91_axes_0, x = var_1933)[name = string("input_91")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_91)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_1952 = const()[name = string("op_1952"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_1953_cast_fp16 = transpose(perm = var_1952, x = per_layer_slice_7_cast_fp16)[name = string("transpose_62")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_1953_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_93_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_93_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394907584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395235328))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_93_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_1969_axes_0 = const()[name = string("op_1969_axes_0"), val = tensor([2])]; + tensor var_1969_cast_fp16 = squeeze(axes = var_1969_axes_0, x = gated_23_cast_fp16)[name = string("op_1969_cast_fp16")]; + tensor var_1973 = const()[name = string("op_1973"), val = tensor([0, 2, 1])]; + int32 var_1979 = const()[name = string("op_1979"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_61_cast_fp16 = transpose(perm = var_1973, x = var_1969_cast_fp16)[name = string("transpose_61")]; + tensor var_1981_cast_fp16 = mul(x = x_61_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_1981_cast_fp16")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95_cast_fp16 = concat(axis = var_1979, interleave = input_95_interleave_0, values = (x_61_cast_fp16, var_1981_cast_fp16))[name = string("input_95_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_1976_to_fp16 = const()[name = string("op_1976_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_1976_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor var_1986_split_sizes_0 = const()[name = string("op_1986_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1986_axis_0 = const()[name = string("op_1986_axis_0"), val = int32(-1)]; + tensor var_1986_cast_fp16_0, tensor var_1986_cast_fp16_1 = split(axis = var_1986_axis_0, split_sizes = var_1986_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_1986_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395237952)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_1986_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = tensor([0x1.62p-1])]; + tensor x_63_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_31_promoted_to_fp16)[name = string("x_63_cast_fp16")]; + int32 var_2001 = const()[name = string("op_2001"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2003_cast_fp16 = mul(x = x_63_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2003_cast_fp16")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97_cast_fp16 = concat(axis = var_2001, interleave = input_97_interleave_0, values = (x_63_cast_fp16, var_2003_cast_fp16))[name = string("input_97_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_1998_to_fp16 = const()[name = string("op_1998_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_1998_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor var_2008_split_sizes_0 = const()[name = string("op_2008_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2008_axis_0 = const()[name = string("op_2008_axis_0"), val = int32(-1)]; + tensor var_2008_cast_fp16_0, tensor var_2008_cast_fp16_1 = split(axis = var_2008_axis_0, split_sizes = var_2008_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2008_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395243136)))]; + tensor h_25_cast_fp16 = mul(x = var_2008_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_2014 = const()[name = string("op_2014"), val = tensor([0, 2, 1])]; + tensor var_2017_axes_0 = const()[name = string("op_2017_axes_0"), val = tensor([2])]; + tensor var_2015_cast_fp16 = transpose(perm = var_2014, x = h_25_cast_fp16)[name = string("transpose_60")]; + tensor var_2017_cast_fp16 = expand_dims(axes = var_2017_axes_0, x = var_2015_cast_fp16)[name = string("op_2017_cast_fp16")]; + string q_41_pad_type_0 = const()[name = string("q_41_pad_type_0"), val = string("valid")]; + tensor q_41_strides_0 = const()[name = string("q_41_strides_0"), val = tensor([1, 1])]; + tensor q_41_pad_0 = const()[name = string("q_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_41_dilations_0 = const()[name = string("q_41_dilations_0"), val = tensor([1, 1])]; + int32 q_41_groups_0 = const()[name = string("q_41_groups_0"), val = int32(1)]; + tensor q_41 = conv(dilations = q_41_dilations_0, groups = q_41_groups_0, pad = q_41_pad_0, pad_type = q_41_pad_type_0, strides = q_41_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_2017_cast_fp16)[name = string("q_41")]; + tensor var_2038 = const()[name = string("op_2038"), val = tensor([1, 8, 256, 3])]; + tensor var_2039 = reshape(shape = var_2038, x = q_41)[name = string("op_2039")]; + tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2062 = const()[name = string("op_2062"), val = tensor([3, 8, 256])]; + tensor transpose_44 = transpose(perm = transpose_44_perm_0, x = var_2039)[name = string("transpose_59")]; + tensor x_65 = reshape(shape = var_2062, x = transpose_44)[name = string("x_65")]; + int32 var_2068 = const()[name = string("op_2068"), val = int32(-1)]; + fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; + tensor var_2070 = mul(x = x_65, y = const_33_promoted)[name = string("op_2070")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101 = concat(axis = var_2068, interleave = input_101_interleave_0, values = (x_65, var_2070))[name = string("input_101")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_2065_to_fp16 = const()[name = string("op_2065_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2065_to_fp16, x = input_101)[name = string("normed_101_cast_fp16")]; + tensor var_2075_split_sizes_0 = const()[name = string("op_2075_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2075_axis_0 = const()[name = string("op_2075_axis_0"), val = int32(-1)]; + tensor var_2075_0, tensor var_2075_1 = split(axis = var_2075_axis_0, split_sizes = var_2075_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2075")]; + tensor q_45 = mul(x = var_2075_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_45")]; + tensor var_2082 = const()[name = string("op_2082"), val = tensor([1, 3, 8, 256])]; + tensor var_2083 = reshape(shape = var_2082, x = q_45)[name = string("op_2083")]; + tensor var_2088 = const()[name = string("op_2088"), val = tensor([0, 2, 1, 3])]; + tensor q_47 = transpose(perm = var_2088, x = var_2083)[name = string("transpose_58")]; + tensor var_2090_cast_fp16 = mul(x = q_47, y = cos_s)[name = string("op_2090_cast_fp16")]; + tensor var_2091_split_sizes_0 = const()[name = string("op_2091_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2091_axis_0 = const()[name = string("op_2091_axis_0"), val = int32(-1)]; + tensor var_2091_0, tensor var_2091_1 = split(axis = var_2091_axis_0, split_sizes = var_2091_split_sizes_0, x = q_47)[name = string("op_2091")]; + fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; + tensor var_2093 = mul(x = var_2091_1, y = const_34_promoted)[name = string("op_2093")]; + int32 var_2095 = const()[name = string("op_2095"), val = int32(-1)]; + bool var_2096_interleave_0 = const()[name = string("op_2096_interleave_0"), val = bool(false)]; + tensor var_2096 = concat(axis = var_2095, interleave = var_2096_interleave_0, values = (var_2093, var_2091_0))[name = string("op_2096")]; + tensor var_2097_cast_fp16 = mul(x = var_2096, y = sin_s)[name = string("op_2097_cast_fp16")]; + tensor q_49_cast_fp16 = add(x = var_2090_cast_fp16, y = var_2097_cast_fp16)[name = string("q_49_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_49_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_4")]; + tensor var_2129 = sub(x = x_67_cast_fp16, y = reduce_max_4)[name = string("op_2129")]; + tensor var_2135 = exp(x = var_2129)[name = string("op_2135")]; + tensor var_2145_axes_0 = const()[name = string("op_2145_axes_0"), val = tensor([-1])]; + bool var_2145_keep_dims_0 = const()[name = string("op_2145_keep_dims_0"), val = bool(true)]; + tensor var_2145 = reduce_sum(axes = var_2145_axes_0, keep_dims = var_2145_keep_dims_0, x = var_2135)[name = string("op_2145")]; + tensor var_2151_cast_fp16 = real_div(x = var_2135, y = var_2145)[name = string("op_2151_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_2151_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_2162 = const()[name = string("op_2162"), val = tensor([0, 2, 1, 3])]; + tensor var_2169 = const()[name = string("op_2169"), val = tensor([1, 3, -1])]; + tensor var_2163_cast_fp16 = transpose(perm = var_2162, x = attn_output_25_cast_fp16)[name = string("transpose_57")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_2169, x = var_2163_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_2174 = const()[name = string("op_2174"), val = tensor([0, 2, 1])]; + string var_2190_pad_type_0 = const()[name = string("op_2190_pad_type_0"), val = string("valid")]; + int32 var_2190_groups_0 = const()[name = string("op_2190_groups_0"), val = int32(1)]; + tensor var_2190_strides_0 = const()[name = string("op_2190_strides_0"), val = tensor([1])]; + tensor var_2190_pad_0 = const()[name = string("op_2190_pad_0"), val = tensor([0, 0])]; + tensor var_2190_dilations_0 = const()[name = string("op_2190_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395248320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397869824))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2175_cast_fp16 = transpose(perm = var_2174, x = attn_output_27_cast_fp16)[name = string("transpose_56")]; + tensor var_2190_cast_fp16 = conv(dilations = var_2190_dilations_0, groups = var_2190_groups_0, pad = var_2190_pad_0, pad_type = var_2190_pad_type_0, strides = var_2190_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_2175_cast_fp16)[name = string("op_2190_cast_fp16")]; + tensor var_2194 = const()[name = string("op_2194"), val = tensor([0, 2, 1])]; + int32 var_2200 = const()[name = string("op_2200"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_2194, x = var_2190_cast_fp16)[name = string("transpose_55")]; + tensor var_2202_cast_fp16 = mul(x = x_71_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2202_cast_fp16")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105_cast_fp16 = concat(axis = var_2200, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2202_cast_fp16))[name = string("input_105_cast_fp16")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_2197_to_fp16 = const()[name = string("op_2197_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2197_to_fp16, x = input_105_cast_fp16)[name = string("normed_105_cast_fp16")]; + tensor var_2207_split_sizes_0 = const()[name = string("op_2207_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2207_axis_0 = const()[name = string("op_2207_axis_0"), val = int32(-1)]; + tensor var_2207_cast_fp16_0, tensor var_2207_cast_fp16_1 = split(axis = var_2207_axis_0, split_sizes = var_2207_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2207_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397872448)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_2207_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_63_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_2216 = const()[name = string("op_2216"), val = int32(-1)]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2218_cast_fp16 = mul(x = x_73_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107_cast_fp16 = concat(axis = var_2216, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2218_cast_fp16))[name = string("input_107_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_2213_to_fp16 = const()[name = string("op_2213_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2213_to_fp16, x = input_107_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor var_2223_split_sizes_0 = const()[name = string("op_2223_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2223_axis_0 = const()[name = string("op_2223_axis_0"), val = int32(-1)]; + tensor var_2223_cast_fp16_0, tensor var_2223_cast_fp16_1 = split(axis = var_2223_axis_0, split_sizes = var_2223_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2223_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397877632)))]; + tensor h_27_cast_fp16 = mul(x = var_2223_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_2234 = const()[name = string("op_2234"), val = tensor([0, 2, 1])]; + tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([2])]; + tensor var_2235 = transpose(perm = var_2234, x = h_27_cast_fp16)[name = string("transpose_54")]; + tensor input_109 = expand_dims(axes = input_109_axes_0, x = var_2235)[name = string("input_109")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_111 = mul(x = gate_19, y = up_9)[name = string("input_111")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_9")]; + tensor var_2275_axes_0 = const()[name = string("op_2275_axes_0"), val = tensor([2])]; + tensor var_2275 = squeeze(axes = var_2275_axes_0, x = mlp_out_9)[name = string("op_2275")]; + tensor var_2279 = const()[name = string("op_2279"), val = tensor([0, 2, 1])]; + int32 var_2285 = const()[name = string("op_2285"), val = int32(-1)]; + fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_2279, x = var_2275)[name = string("transpose_53")]; + tensor var_2287 = mul(x = x_75, y = const_37_promoted)[name = string("op_2287")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_2285, interleave = input_113_interleave_0, values = (x_75, var_2287))[name = string("input_113")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_2282_to_fp16 = const()[name = string("op_2282_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2282_to_fp16, x = input_113)[name = string("normed_113_cast_fp16")]; + tensor var_2292_split_sizes_0 = const()[name = string("op_2292_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2292_axis_0 = const()[name = string("op_2292_axis_0"), val = int32(-1)]; + tensor var_2292_0, tensor var_2292_1 = split(axis = var_2292_axis_0, split_sizes = var_2292_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2292")]; + tensor hidden_states_43 = mul(x = var_2292_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 7168])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 3, 7424])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_2320 = const()[name = string("op_2320"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_2321 = transpose(perm = var_2320, x = hidden_states_45_cast_fp16)[name = string("transpose_52")]; + tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_2321)[name = string("input_115")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_2340 = const()[name = string("op_2340"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_2341_cast_fp16 = transpose(perm = var_2340, x = per_layer_slice_9_cast_fp16)[name = string("transpose_51")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_2341_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_117_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397882816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398210560))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_2357_axes_0 = const()[name = string("op_2357_axes_0"), val = tensor([2])]; + tensor var_2357_cast_fp16 = squeeze(axes = var_2357_axes_0, x = gated_29_cast_fp16)[name = string("op_2357_cast_fp16")]; + tensor var_2361 = const()[name = string("op_2361"), val = tensor([0, 2, 1])]; + int32 var_2367 = const()[name = string("op_2367"), val = int32(-1)]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_2361, x = var_2357_cast_fp16)[name = string("transpose_50")]; + tensor var_2369_cast_fp16 = mul(x = x_77_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2369_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_2367, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2369_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_2364_to_fp16 = const()[name = string("op_2364_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2364_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor var_2374_split_sizes_0 = const()[name = string("op_2374_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2374_axis_0 = const()[name = string("op_2374_axis_0"), val = int32(-1)]; + tensor var_2374_cast_fp16_0, tensor var_2374_cast_fp16_1 = split(axis = var_2374_axis_0, split_sizes = var_2374_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2374_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398213184)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_2374_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = tensor([0x1.3ap-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_39_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + int32 var_2389 = const()[name = string("op_2389"), val = int32(-1)]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2391_cast_fp16 = mul(x = x_79_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2391_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_2389, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2391_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_2386_to_fp16 = const()[name = string("op_2386_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2386_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor var_2396_split_sizes_0 = const()[name = string("op_2396_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2396_axis_0 = const()[name = string("op_2396_axis_0"), val = int32(-1)]; + tensor var_2396_cast_fp16_0, tensor var_2396_cast_fp16_1 = split(axis = var_2396_axis_0, split_sizes = var_2396_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2396_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398218368)))]; + tensor h_31_cast_fp16 = mul(x = var_2396_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_2402 = const()[name = string("op_2402"), val = tensor([0, 2, 1])]; + tensor var_2405_axes_0 = const()[name = string("op_2405_axes_0"), val = tensor([2])]; + tensor var_2403_cast_fp16 = transpose(perm = var_2402, x = h_31_cast_fp16)[name = string("transpose_49")]; + tensor var_2405_cast_fp16 = expand_dims(axes = var_2405_axes_0, x = var_2403_cast_fp16)[name = string("op_2405_cast_fp16")]; + string q_51_pad_type_0 = const()[name = string("q_51_pad_type_0"), val = string("valid")]; + tensor q_51_strides_0 = const()[name = string("q_51_strides_0"), val = tensor([1, 1])]; + tensor q_51_pad_0 = const()[name = string("q_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_51_dilations_0 = const()[name = string("q_51_dilations_0"), val = tensor([1, 1])]; + int32 q_51_groups_0 = const()[name = string("q_51_groups_0"), val = int32(1)]; + tensor q_51 = conv(dilations = q_51_dilations_0, groups = q_51_groups_0, pad = q_51_pad_0, pad_type = q_51_pad_type_0, strides = q_51_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_2405_cast_fp16)[name = string("q_51")]; + tensor var_2426 = const()[name = string("op_2426"), val = tensor([1, 8, 512, 3])]; + tensor var_2427 = reshape(shape = var_2426, x = q_51)[name = string("op_2427")]; + tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2450 = const()[name = string("op_2450"), val = tensor([3, 8, 512])]; + tensor transpose_46 = transpose(perm = transpose_46_perm_0, x = var_2427)[name = string("transpose_48")]; + tensor x_81 = reshape(shape = var_2450, x = transpose_46)[name = string("x_81")]; + int32 var_2456 = const()[name = string("op_2456"), val = int32(-1)]; + fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; + tensor var_2458 = mul(x = x_81, y = const_41_promoted)[name = string("op_2458")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_2456, interleave = input_125_interleave_0, values = (x_81, var_2458))[name = string("input_125")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_2453_to_fp16 = const()[name = string("op_2453_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2453_to_fp16, x = input_125)[name = string("normed_125_cast_fp16")]; + tensor var_2463_split_sizes_0 = const()[name = string("op_2463_split_sizes_0"), val = tensor([512, 512])]; + int32 var_2463_axis_0 = const()[name = string("op_2463_axis_0"), val = int32(-1)]; + tensor var_2463_0, tensor var_2463_1 = split(axis = var_2463_axis_0, split_sizes = var_2463_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2463")]; + tensor q_55 = mul(x = var_2463_0, y = layers_5_self_attn_q_norm_weight)[name = string("q_55")]; + tensor var_2470 = const()[name = string("op_2470"), val = tensor([1, 3, 8, 512])]; + tensor var_2471 = reshape(shape = var_2470, x = q_55)[name = string("op_2471")]; + tensor var_2476 = const()[name = string("op_2476"), val = tensor([0, 2, 1, 3])]; + tensor q_57 = transpose(perm = var_2476, x = var_2471)[name = string("transpose_47")]; + tensor var_2478_cast_fp16 = mul(x = q_57, y = cos_f)[name = string("op_2478_cast_fp16")]; + tensor var_2479_split_sizes_0 = const()[name = string("op_2479_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2479_axis_0 = const()[name = string("op_2479_axis_0"), val = int32(-1)]; + tensor var_2479_0, tensor var_2479_1 = split(axis = var_2479_axis_0, split_sizes = var_2479_split_sizes_0, x = q_57)[name = string("op_2479")]; + fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; + tensor var_2481 = mul(x = var_2479_1, y = const_42_promoted)[name = string("op_2481")]; + int32 var_2483 = const()[name = string("op_2483"), val = int32(-1)]; + bool var_2484_interleave_0 = const()[name = string("op_2484_interleave_0"), val = bool(false)]; + tensor var_2484 = concat(axis = var_2483, interleave = var_2484_interleave_0, values = (var_2481, var_2479_0))[name = string("op_2484")]; + tensor var_2485_cast_fp16 = mul(x = var_2484, y = sin_f)[name = string("op_2485_cast_fp16")]; + tensor q_59_cast_fp16 = add(x = var_2478_cast_fp16, y = var_2485_cast_fp16)[name = string("q_59_cast_fp16")]; + tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_10_reps_0 = const()[name = string("tile_10_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_20_cast_fp16 = transpose(perm = transpose_20_perm_0, x = kv14_k)[name = string("transpose_46")]; + tensor tile_10_cast_fp16 = tile(reps = tile_10_reps_0, x = transpose_20_cast_fp16)[name = string("tile_10_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_20, x = tile_10_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_45")]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_21, x = transpose_21_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_11_reps_0 = const()[name = string("tile_11_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_22_cast_fp16 = transpose(perm = transpose_22_perm_0, x = kv14_v)[name = string("transpose_44")]; + tensor tile_11_cast_fp16 = tile(reps = tile_11_reps_0, x = transpose_22_cast_fp16)[name = string("tile_11_cast_fp16")]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_22, x = tile_11_cast_fp16)[name = string("reshape_22_cast_fp16")]; + tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_23_cast_fp16 = transpose(perm = transpose_23_perm_0, x = reshape_22_cast_fp16)[name = string("transpose_43")]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_23, x = transpose_23_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor V_expanded_11_perm_0 = const()[name = string("V_expanded_11_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor transpose_47_cast_fp16 = transpose(perm = transpose_47_perm_0, x = reshape_21_cast_fp16)[name = string("transpose_42")]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_59_cast_fp16, y = transpose_47_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_83_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_full)[name = string("x_83_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_83_cast_fp16)[name = string("reduce_max_5")]; + tensor var_2517 = sub(x = x_83_cast_fp16, y = reduce_max_5)[name = string("op_2517")]; + tensor var_2523 = exp(x = var_2517)[name = string("op_2523")]; + tensor var_2533_axes_0 = const()[name = string("op_2533_axes_0"), val = tensor([-1])]; + bool var_2533_keep_dims_0 = const()[name = string("op_2533_keep_dims_0"), val = bool(true)]; + tensor var_2533 = reduce_sum(axes = var_2533_axes_0, keep_dims = var_2533_keep_dims_0, x = var_2523)[name = string("op_2533")]; + tensor var_2539_cast_fp16 = real_div(x = var_2523, y = var_2533)[name = string("op_2539_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor V_expanded_11_cast_fp16 = transpose(perm = V_expanded_11_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_41")]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_2539_cast_fp16, y = V_expanded_11_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_2550 = const()[name = string("op_2550"), val = tensor([0, 2, 1, 3])]; + tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 3, -1])]; + tensor var_2551_cast_fp16 = transpose(perm = var_2550, x = attn_output_31_cast_fp16)[name = string("transpose_40")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_2557, x = var_2551_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_2562 = const()[name = string("op_2562"), val = tensor([0, 2, 1])]; + string var_2578_pad_type_0 = const()[name = string("op_2578_pad_type_0"), val = string("valid")]; + int32 var_2578_groups_0 = const()[name = string("op_2578_groups_0"), val = int32(1)]; + tensor var_2578_strides_0 = const()[name = string("op_2578_strides_0"), val = tensor([1])]; + tensor var_2578_pad_0 = const()[name = string("op_2578_pad_0"), val = tensor([0, 0])]; + tensor var_2578_dilations_0 = const()[name = string("op_2578_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398223552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403466496))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2563_cast_fp16 = transpose(perm = var_2562, x = attn_output_33_cast_fp16)[name = string("transpose_39")]; + tensor var_2578_cast_fp16 = conv(dilations = var_2578_dilations_0, groups = var_2578_groups_0, pad = var_2578_pad_0, pad_type = var_2578_pad_type_0, strides = var_2578_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_2563_cast_fp16)[name = string("op_2578_cast_fp16")]; + tensor var_2582 = const()[name = string("op_2582"), val = tensor([0, 2, 1])]; + int32 var_2588 = const()[name = string("op_2588"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_87_cast_fp16 = transpose(perm = var_2582, x = var_2578_cast_fp16)[name = string("transpose_38")]; + tensor var_2590_cast_fp16 = mul(x = x_87_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2590_cast_fp16")]; + bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)]; + tensor input_129_cast_fp16 = concat(axis = var_2588, interleave = input_129_interleave_0, values = (x_87_cast_fp16, var_2590_cast_fp16))[name = string("input_129_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_2585_to_fp16 = const()[name = string("op_2585_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_2585_to_fp16, x = input_129_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_2595_split_sizes_0 = const()[name = string("op_2595_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2595_axis_0 = const()[name = string("op_2595_axis_0"), val = int32(-1)]; + tensor var_2595_cast_fp16_0, tensor var_2595_cast_fp16_1 = split(axis = var_2595_axis_0, split_sizes = var_2595_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_2595_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403469120)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_2595_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_2604 = const()[name = string("op_2604"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2606_cast_fp16 = mul(x = x_89_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2606_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_2604, interleave = input_131_interleave_0, values = (x_89_cast_fp16, var_2606_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_2601_to_fp16 = const()[name = string("op_2601_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_2601_to_fp16, x = input_131_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor var_2611_split_sizes_0 = const()[name = string("op_2611_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2611_axis_0 = const()[name = string("op_2611_axis_0"), val = int32(-1)]; + tensor var_2611_cast_fp16_0, tensor var_2611_cast_fp16_1 = split(axis = var_2611_axis_0, split_sizes = var_2611_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_2611_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403474304)))]; + tensor h_33_cast_fp16 = mul(x = var_2611_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_2622 = const()[name = string("op_2622"), val = tensor([0, 2, 1])]; + tensor input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor([2])]; + tensor var_2623 = transpose(perm = var_2622, x = h_33_cast_fp16)[name = string("transpose_37")]; + tensor input_133 = expand_dims(axes = input_133_axes_0, x = var_2623)[name = string("input_133")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_133)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_133)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_135 = mul(x = gate_23, y = up_11)[name = string("input_135")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_135)[name = string("mlp_out_11")]; + tensor var_2663_axes_0 = const()[name = string("op_2663_axes_0"), val = tensor([2])]; + tensor var_2663 = squeeze(axes = var_2663_axes_0, x = mlp_out_11)[name = string("op_2663")]; + tensor var_2667 = const()[name = string("op_2667"), val = tensor([0, 2, 1])]; + int32 var_2673 = const()[name = string("op_2673"), val = int32(-1)]; + fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; + tensor x_91 = transpose(perm = var_2667, x = var_2663)[name = string("transpose_36")]; + tensor var_2675 = mul(x = x_91, y = const_45_promoted)[name = string("op_2675")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137 = concat(axis = var_2673, interleave = input_137_interleave_0, values = (x_91, var_2675))[name = string("input_137")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_2670_to_fp16 = const()[name = string("op_2670_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_2670_to_fp16, x = input_137)[name = string("normed_137_cast_fp16")]; + tensor var_2680_split_sizes_0 = const()[name = string("op_2680_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2680_axis_0 = const()[name = string("op_2680_axis_0"), val = int32(-1)]; + tensor var_2680_0, tensor var_2680_1 = split(axis = var_2680_axis_0, split_sizes = var_2680_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_2680")]; + tensor hidden_states_53 = mul(x = var_2680_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 7424])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 3, 7680])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_2708 = const()[name = string("op_2708"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_2709 = transpose(perm = var_2708, x = hidden_states_55_cast_fp16)[name = string("transpose_35")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_2709)[name = string("input_139")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_139)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_2728 = const()[name = string("op_2728"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_2729_cast_fp16 = transpose(perm = var_2728, x = per_layer_slice_11_cast_fp16)[name = string("transpose_34")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_2729_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_141_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_141_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403479488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403807232))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_141_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_2745_axes_0 = const()[name = string("op_2745_axes_0"), val = tensor([2])]; + tensor var_2745_cast_fp16 = squeeze(axes = var_2745_axes_0, x = gated_35_cast_fp16)[name = string("op_2745_cast_fp16")]; + tensor var_2749 = const()[name = string("op_2749"), val = tensor([0, 2, 1])]; + int32 var_2755 = const()[name = string("op_2755"), val = int32(-1)]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_93_cast_fp16 = transpose(perm = var_2749, x = var_2745_cast_fp16)[name = string("transpose_33")]; + tensor var_2757_cast_fp16 = mul(x = x_93_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2757_cast_fp16")]; + bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; + tensor input_143_cast_fp16 = concat(axis = var_2755, interleave = input_143_interleave_0, values = (x_93_cast_fp16, var_2757_cast_fp16))[name = string("input_143_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_2752_to_fp16 = const()[name = string("op_2752_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_2752_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_2762_split_sizes_0 = const()[name = string("op_2762_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2762_axis_0 = const()[name = string("op_2762_axis_0"), val = int32(-1)]; + tensor var_2762_cast_fp16_0, tensor var_2762_cast_fp16_1 = split(axis = var_2762_axis_0, split_sizes = var_2762_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_2762_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403809856)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_2762_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor([0x1.aep-2])]; + tensor x_95_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_95_cast_fp16")]; + int32 var_2777 = const()[name = string("op_2777"), val = int32(-1)]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2779_cast_fp16 = mul(x = x_95_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2779_cast_fp16")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145_cast_fp16 = concat(axis = var_2777, interleave = input_145_interleave_0, values = (x_95_cast_fp16, var_2779_cast_fp16))[name = string("input_145_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_2774_to_fp16 = const()[name = string("op_2774_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_2774_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor var_2784_split_sizes_0 = const()[name = string("op_2784_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2784_axis_0 = const()[name = string("op_2784_axis_0"), val = int32(-1)]; + tensor var_2784_cast_fp16_0, tensor var_2784_cast_fp16_1 = split(axis = var_2784_axis_0, split_sizes = var_2784_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_2784_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403815040)))]; + tensor h_37_cast_fp16 = mul(x = var_2784_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_2790 = const()[name = string("op_2790"), val = tensor([0, 2, 1])]; + tensor var_2793_axes_0 = const()[name = string("op_2793_axes_0"), val = tensor([2])]; + tensor var_2791_cast_fp16 = transpose(perm = var_2790, x = h_37_cast_fp16)[name = string("transpose_32")]; + tensor var_2793_cast_fp16 = expand_dims(axes = var_2793_axes_0, x = var_2791_cast_fp16)[name = string("op_2793_cast_fp16")]; + string q_61_pad_type_0 = const()[name = string("q_61_pad_type_0"), val = string("valid")]; + tensor q_61_strides_0 = const()[name = string("q_61_strides_0"), val = tensor([1, 1])]; + tensor q_61_pad_0 = const()[name = string("q_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_61_dilations_0 = const()[name = string("q_61_dilations_0"), val = tensor([1, 1])]; + int32 q_61_groups_0 = const()[name = string("q_61_groups_0"), val = int32(1)]; + tensor q_61 = conv(dilations = q_61_dilations_0, groups = q_61_groups_0, pad = q_61_pad_0, pad_type = q_61_pad_type_0, strides = q_61_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_2793_cast_fp16)[name = string("q_61")]; + tensor var_2814 = const()[name = string("op_2814"), val = tensor([1, 8, 256, 3])]; + tensor var_2815 = reshape(shape = var_2814, x = q_61)[name = string("op_2815")]; + tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2838 = const()[name = string("op_2838"), val = tensor([3, 8, 256])]; + tensor transpose_48 = transpose(perm = transpose_48_perm_0, x = var_2815)[name = string("transpose_31")]; + tensor x_97 = reshape(shape = var_2838, x = transpose_48)[name = string("x_97")]; + int32 var_2844 = const()[name = string("op_2844"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor var_2846 = mul(x = x_97, y = const_49_promoted)[name = string("op_2846")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149 = concat(axis = var_2844, interleave = input_149_interleave_0, values = (x_97, var_2846))[name = string("input_149")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_2841_to_fp16 = const()[name = string("op_2841_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_2841_to_fp16, x = input_149)[name = string("normed_149_cast_fp16")]; + tensor var_2851_split_sizes_0 = const()[name = string("op_2851_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2851_axis_0 = const()[name = string("op_2851_axis_0"), val = int32(-1)]; + tensor var_2851_0, tensor var_2851_1 = split(axis = var_2851_axis_0, split_sizes = var_2851_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_2851")]; + tensor q_65 = mul(x = var_2851_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_65")]; + tensor var_2858 = const()[name = string("op_2858"), val = tensor([1, 3, 8, 256])]; + tensor var_2859 = reshape(shape = var_2858, x = q_65)[name = string("op_2859")]; + tensor var_2864 = const()[name = string("op_2864"), val = tensor([0, 2, 1, 3])]; + tensor q_67 = transpose(perm = var_2864, x = var_2859)[name = string("transpose_30")]; + tensor var_2866_cast_fp16 = mul(x = q_67, y = cos_s)[name = string("op_2866_cast_fp16")]; + tensor var_2867_split_sizes_0 = const()[name = string("op_2867_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2867_axis_0 = const()[name = string("op_2867_axis_0"), val = int32(-1)]; + tensor var_2867_0, tensor var_2867_1 = split(axis = var_2867_axis_0, split_sizes = var_2867_split_sizes_0, x = q_67)[name = string("op_2867")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_2869 = mul(x = var_2867_1, y = const_50_promoted)[name = string("op_2869")]; + int32 var_2871 = const()[name = string("op_2871"), val = int32(-1)]; + bool var_2872_interleave_0 = const()[name = string("op_2872_interleave_0"), val = bool(false)]; + tensor var_2872 = concat(axis = var_2871, interleave = var_2872_interleave_0, values = (var_2869, var_2867_0))[name = string("op_2872")]; + tensor var_2873_cast_fp16 = mul(x = var_2872, y = sin_s)[name = string("op_2873_cast_fp16")]; + tensor q_69_cast_fp16 = add(x = var_2866_cast_fp16, y = var_2873_cast_fp16)[name = string("q_69_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_69_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_99_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_99_cast_fp16)[name = string("reduce_max_6")]; + tensor var_2905 = sub(x = x_99_cast_fp16, y = reduce_max_6)[name = string("op_2905")]; + tensor var_2911 = exp(x = var_2905)[name = string("op_2911")]; + tensor var_2921_axes_0 = const()[name = string("op_2921_axes_0"), val = tensor([-1])]; + bool var_2921_keep_dims_0 = const()[name = string("op_2921_keep_dims_0"), val = bool(true)]; + tensor var_2921 = reduce_sum(axes = var_2921_axes_0, keep_dims = var_2921_keep_dims_0, x = var_2911)[name = string("op_2921")]; + tensor var_2927_cast_fp16 = real_div(x = var_2911, y = var_2921)[name = string("op_2927_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_2927_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_2938 = const()[name = string("op_2938"), val = tensor([0, 2, 1, 3])]; + tensor var_2945 = const()[name = string("op_2945"), val = tensor([1, 3, -1])]; + tensor var_2939_cast_fp16 = transpose(perm = var_2938, x = attn_output_37_cast_fp16)[name = string("transpose_29")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_2945, x = var_2939_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_2950 = const()[name = string("op_2950"), val = tensor([0, 2, 1])]; + string var_2966_pad_type_0 = const()[name = string("op_2966_pad_type_0"), val = string("valid")]; + int32 var_2966_groups_0 = const()[name = string("op_2966_groups_0"), val = int32(1)]; + tensor var_2966_strides_0 = const()[name = string("op_2966_strides_0"), val = tensor([1])]; + tensor var_2966_pad_0 = const()[name = string("op_2966_pad_0"), val = tensor([0, 0])]; + tensor var_2966_dilations_0 = const()[name = string("op_2966_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403820224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406441728))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2951_cast_fp16 = transpose(perm = var_2950, x = attn_output_39_cast_fp16)[name = string("transpose_28")]; + tensor var_2966_cast_fp16 = conv(dilations = var_2966_dilations_0, groups = var_2966_groups_0, pad = var_2966_pad_0, pad_type = var_2966_pad_type_0, strides = var_2966_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_2951_cast_fp16)[name = string("op_2966_cast_fp16")]; + tensor var_2970 = const()[name = string("op_2970"), val = tensor([0, 2, 1])]; + int32 var_2976 = const()[name = string("op_2976"), val = int32(-1)]; + fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_103_cast_fp16 = transpose(perm = var_2970, x = var_2966_cast_fp16)[name = string("transpose_27")]; + tensor var_2978_cast_fp16 = mul(x = x_103_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_2978_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_2976, interleave = input_153_interleave_0, values = (x_103_cast_fp16, var_2978_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_2973_to_fp16 = const()[name = string("op_2973_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_2973_to_fp16, x = input_153_cast_fp16)[name = string("normed_153_cast_fp16")]; + tensor var_2983_split_sizes_0 = const()[name = string("op_2983_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2983_axis_0 = const()[name = string("op_2983_axis_0"), val = int32(-1)]; + tensor var_2983_cast_fp16_0, tensor var_2983_cast_fp16_1 = split(axis = var_2983_axis_0, split_sizes = var_2983_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_2983_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406444352)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_2983_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_95_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_105_cast_fp16")]; + int32 var_2992 = const()[name = string("op_2992"), val = int32(-1)]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2994_cast_fp16 = mul(x = x_105_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2994_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_2992, interleave = input_155_interleave_0, values = (x_105_cast_fp16, var_2994_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_2989_to_fp16 = const()[name = string("op_2989_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_2989_to_fp16, x = input_155_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_2999_split_sizes_0 = const()[name = string("op_2999_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2999_axis_0 = const()[name = string("op_2999_axis_0"), val = int32(-1)]; + tensor var_2999_cast_fp16_0, tensor var_2999_cast_fp16_1 = split(axis = var_2999_axis_0, split_sizes = var_2999_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_2999_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406449536)))]; + tensor h_39_cast_fp16 = mul(x = var_2999_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_3010 = const()[name = string("op_3010"), val = tensor([0, 2, 1])]; + tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; + tensor var_3011 = transpose(perm = var_3010, x = h_39_cast_fp16)[name = string("transpose_26")]; + tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_3011)[name = string("input_157")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_157)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_157)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_159 = mul(x = gate_27, y = up_13)[name = string("input_159")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_159)[name = string("mlp_out_13")]; + tensor var_3051_axes_0 = const()[name = string("op_3051_axes_0"), val = tensor([2])]; + tensor var_3051 = squeeze(axes = var_3051_axes_0, x = mlp_out_13)[name = string("op_3051")]; + tensor var_3055 = const()[name = string("op_3055"), val = tensor([0, 2, 1])]; + int32 var_3061 = const()[name = string("op_3061"), val = int32(-1)]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor x_107 = transpose(perm = var_3055, x = var_3051)[name = string("transpose_25")]; + tensor var_3063 = mul(x = x_107, y = const_53_promoted)[name = string("op_3063")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161 = concat(axis = var_3061, interleave = input_161_interleave_0, values = (x_107, var_3063))[name = string("input_161")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_3058_to_fp16 = const()[name = string("op_3058_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3058_to_fp16, x = input_161)[name = string("normed_161_cast_fp16")]; + tensor var_3068_split_sizes_0 = const()[name = string("op_3068_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3068_axis_0 = const()[name = string("op_3068_axis_0"), val = int32(-1)]; + tensor var_3068_0, tensor var_3068_1 = split(axis = var_3068_axis_0, split_sizes = var_3068_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3068")]; + tensor hidden_states_63 = mul(x = var_3068_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_105_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 7680])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 3, 7936])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_3096 = const()[name = string("op_3096"), val = tensor([0, 2, 1])]; + tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; + tensor var_3097 = transpose(perm = var_3096, x = hidden_states_65_cast_fp16)[name = string("transpose_24")]; + tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_3097)[name = string("input_163")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_163)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_3116 = const()[name = string("op_3116"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_3117_cast_fp16 = transpose(perm = var_3116, x = per_layer_slice_13_cast_fp16)[name = string("transpose_23")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_3117_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_165_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_165_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406454720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406782464))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_165_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_3133_axes_0 = const()[name = string("op_3133_axes_0"), val = tensor([2])]; + tensor var_3133_cast_fp16 = squeeze(axes = var_3133_axes_0, x = gated_41_cast_fp16)[name = string("op_3133_cast_fp16")]; + tensor var_3137 = const()[name = string("op_3137"), val = tensor([0, 2, 1])]; + int32 var_3143 = const()[name = string("op_3143"), val = int32(-1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_109_cast_fp16 = transpose(perm = var_3137, x = var_3133_cast_fp16)[name = string("transpose_22")]; + tensor var_3145_cast_fp16 = mul(x = x_109_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_3145_cast_fp16")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167_cast_fp16 = concat(axis = var_3143, interleave = input_167_interleave_0, values = (x_109_cast_fp16, var_3145_cast_fp16))[name = string("input_167_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3140_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor var_3150_split_sizes_0 = const()[name = string("op_3150_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3150_axis_0 = const()[name = string("op_3150_axis_0"), val = int32(-1)]; + tensor var_3150_cast_fp16_0, tensor var_3150_cast_fp16_1 = split(axis = var_3150_axis_0, split_sizes = var_3150_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3150_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406785088)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_3150_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = tensor([0x1.6cp-1])]; + tensor x_111_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_55_promoted_to_fp16)[name = string("x_111_cast_fp16")]; + int32 var_3165 = const()[name = string("op_3165"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3167_cast_fp16 = mul(x = x_111_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3167_cast_fp16")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169_cast_fp16 = concat(axis = var_3165, interleave = input_169_interleave_0, values = (x_111_cast_fp16, var_3167_cast_fp16))[name = string("input_169_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_3162_to_fp16 = const()[name = string("op_3162_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3162_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_3172_split_sizes_0 = const()[name = string("op_3172_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3172_axis_0 = const()[name = string("op_3172_axis_0"), val = int32(-1)]; + tensor var_3172_cast_fp16_0, tensor var_3172_cast_fp16_1 = split(axis = var_3172_axis_0, split_sizes = var_3172_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3172_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406790272)))]; + tensor h_43_cast_fp16 = mul(x = var_3172_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_3178 = const()[name = string("op_3178"), val = tensor([0, 2, 1])]; + tensor var_3181_axes_0 = const()[name = string("op_3181_axes_0"), val = tensor([2])]; + tensor var_3179_cast_fp16 = transpose(perm = var_3178, x = h_43_cast_fp16)[name = string("transpose_21")]; + tensor var_3181_cast_fp16 = expand_dims(axes = var_3181_axes_0, x = var_3179_cast_fp16)[name = string("op_3181_cast_fp16")]; + string q_71_pad_type_0 = const()[name = string("q_71_pad_type_0"), val = string("valid")]; + tensor q_71_strides_0 = const()[name = string("q_71_strides_0"), val = tensor([1, 1])]; + tensor q_71_pad_0 = const()[name = string("q_71_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_71_dilations_0 = const()[name = string("q_71_dilations_0"), val = tensor([1, 1])]; + int32 q_71_groups_0 = const()[name = string("q_71_groups_0"), val = int32(1)]; + tensor q_71 = conv(dilations = q_71_dilations_0, groups = q_71_groups_0, pad = q_71_pad_0, pad_type = q_71_pad_type_0, strides = q_71_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3181_cast_fp16)[name = string("q_71")]; + tensor var_3202 = const()[name = string("op_3202"), val = tensor([1, 8, 256, 3])]; + tensor var_3203 = reshape(shape = var_3202, x = q_71)[name = string("op_3203")]; + tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_3226 = const()[name = string("op_3226"), val = tensor([3, 8, 256])]; + tensor transpose_50 = transpose(perm = transpose_50_perm_0, x = var_3203)[name = string("transpose_20")]; + tensor x_113 = reshape(shape = var_3226, x = transpose_50)[name = string("x_113")]; + int32 var_3232 = const()[name = string("op_3232"), val = int32(-1)]; + fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; + tensor var_3234 = mul(x = x_113, y = const_57_promoted)[name = string("op_3234")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173 = concat(axis = var_3232, interleave = input_173_interleave_0, values = (x_113, var_3234))[name = string("input_173")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_3229_to_fp16 = const()[name = string("op_3229_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3229_to_fp16, x = input_173)[name = string("normed_173_cast_fp16")]; + tensor var_3239_split_sizes_0 = const()[name = string("op_3239_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3239_axis_0 = const()[name = string("op_3239_axis_0"), val = int32(-1)]; + tensor var_3239_0, tensor var_3239_1 = split(axis = var_3239_axis_0, split_sizes = var_3239_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3239")]; + tensor q_75 = mul(x = var_3239_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_75")]; + tensor var_3246 = const()[name = string("op_3246"), val = tensor([1, 3, 8, 256])]; + tensor var_3247 = reshape(shape = var_3246, x = q_75)[name = string("op_3247")]; + tensor var_3252 = const()[name = string("op_3252"), val = tensor([0, 2, 1, 3])]; + tensor q_77 = transpose(perm = var_3252, x = var_3247)[name = string("transpose_19")]; + tensor var_3254_cast_fp16 = mul(x = q_77, y = cos_s)[name = string("op_3254_cast_fp16")]; + tensor var_3255_split_sizes_0 = const()[name = string("op_3255_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3255_axis_0 = const()[name = string("op_3255_axis_0"), val = int32(-1)]; + tensor var_3255_0, tensor var_3255_1 = split(axis = var_3255_axis_0, split_sizes = var_3255_split_sizes_0, x = q_77)[name = string("op_3255")]; + fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)]; + tensor var_3257 = mul(x = var_3255_1, y = const_58_promoted)[name = string("op_3257")]; + int32 var_3259 = const()[name = string("op_3259"), val = int32(-1)]; + bool var_3260_interleave_0 = const()[name = string("op_3260_interleave_0"), val = bool(false)]; + tensor var_3260 = concat(axis = var_3259, interleave = var_3260_interleave_0, values = (var_3257, var_3255_0))[name = string("op_3260")]; + tensor var_3261_cast_fp16 = mul(x = var_3260, y = sin_s)[name = string("op_3261_cast_fp16")]; + tensor q_79_cast_fp16 = add(x = var_3254_cast_fp16, y = var_3261_cast_fp16)[name = string("q_79_cast_fp16")]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_79_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_115_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_115_cast_fp16)[name = string("reduce_max_7")]; + tensor var_3293 = sub(x = x_115_cast_fp16, y = reduce_max_7)[name = string("op_3293")]; + tensor var_3299 = exp(x = var_3293)[name = string("op_3299")]; + tensor var_3309_axes_0 = const()[name = string("op_3309_axes_0"), val = tensor([-1])]; + bool var_3309_keep_dims_0 = const()[name = string("op_3309_keep_dims_0"), val = bool(true)]; + tensor var_3309 = reduce_sum(axes = var_3309_axes_0, keep_dims = var_3309_keep_dims_0, x = var_3299)[name = string("op_3309")]; + tensor var_3315_cast_fp16 = real_div(x = var_3299, y = var_3309)[name = string("op_3315_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_3315_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_3326 = const()[name = string("op_3326"), val = tensor([0, 2, 1, 3])]; + tensor var_3333 = const()[name = string("op_3333"), val = tensor([1, 3, -1])]; + tensor var_3327_cast_fp16 = transpose(perm = var_3326, x = attn_output_43_cast_fp16)[name = string("transpose_18")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_3333, x = var_3327_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_3338 = const()[name = string("op_3338"), val = tensor([0, 2, 1])]; + string var_3354_pad_type_0 = const()[name = string("op_3354_pad_type_0"), val = string("valid")]; + int32 var_3354_groups_0 = const()[name = string("op_3354_groups_0"), val = int32(1)]; + tensor var_3354_strides_0 = const()[name = string("op_3354_strides_0"), val = tensor([1])]; + tensor var_3354_pad_0 = const()[name = string("op_3354_pad_0"), val = tensor([0, 0])]; + tensor var_3354_dilations_0 = const()[name = string("op_3354_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406795456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409416960))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3339_cast_fp16 = transpose(perm = var_3338, x = attn_output_45_cast_fp16)[name = string("transpose_17")]; + tensor var_3354_cast_fp16 = conv(dilations = var_3354_dilations_0, groups = var_3354_groups_0, pad = var_3354_pad_0, pad_type = var_3354_pad_type_0, strides = var_3354_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_3339_cast_fp16)[name = string("op_3354_cast_fp16")]; + tensor var_3358 = const()[name = string("op_3358"), val = tensor([0, 2, 1])]; + int32 var_3364 = const()[name = string("op_3364"), val = int32(-1)]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_119_cast_fp16 = transpose(perm = var_3358, x = var_3354_cast_fp16)[name = string("transpose_16")]; + tensor var_3366_cast_fp16 = mul(x = x_119_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3366_cast_fp16")]; + bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; + tensor input_177_cast_fp16 = concat(axis = var_3364, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_3366_cast_fp16))[name = string("input_177_cast_fp16")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_3361_to_fp16 = const()[name = string("op_3361_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3361_to_fp16, x = input_177_cast_fp16)[name = string("normed_177_cast_fp16")]; + tensor var_3371_split_sizes_0 = const()[name = string("op_3371_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3371_axis_0 = const()[name = string("op_3371_axis_0"), val = int32(-1)]; + tensor var_3371_cast_fp16_0, tensor var_3371_cast_fp16_1 = split(axis = var_3371_axis_0, split_sizes = var_3371_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3371_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409419584)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_3371_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_111_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_121_cast_fp16")]; + int32 var_3380 = const()[name = string("op_3380"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3382_cast_fp16 = mul(x = x_121_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3382_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_3380, interleave = input_179_interleave_0, values = (x_121_cast_fp16, var_3382_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_3377_to_fp16 = const()[name = string("op_3377_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3377_to_fp16, x = input_179_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor var_3387_split_sizes_0 = const()[name = string("op_3387_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3387_axis_0 = const()[name = string("op_3387_axis_0"), val = int32(-1)]; + tensor var_3387_cast_fp16_0, tensor var_3387_cast_fp16_1 = split(axis = var_3387_axis_0, split_sizes = var_3387_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3387_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409424768)))]; + tensor h_45_cast_fp16 = mul(x = var_3387_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_3398 = const()[name = string("op_3398"), val = tensor([0, 2, 1])]; + tensor input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor([2])]; + tensor var_3399 = transpose(perm = var_3398, x = h_45_cast_fp16)[name = string("transpose_15")]; + tensor input_181 = expand_dims(axes = input_181_axes_0, x = var_3399)[name = string("input_181")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_183 = mul(x = gate_31, y = up_15)[name = string("input_183")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_15")]; + tensor var_3439_axes_0 = const()[name = string("op_3439_axes_0"), val = tensor([2])]; + tensor var_3439 = squeeze(axes = var_3439_axes_0, x = mlp_out_15)[name = string("op_3439")]; + tensor var_3443 = const()[name = string("op_3443"), val = tensor([0, 2, 1])]; + int32 var_3449 = const()[name = string("op_3449"), val = int32(-1)]; + fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; + tensor x_123 = transpose(perm = var_3443, x = var_3439)[name = string("transpose_14")]; + tensor var_3451 = mul(x = x_123, y = const_61_promoted)[name = string("op_3451")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_3449, interleave = input_185_interleave_0, values = (x_123, var_3451))[name = string("input_185")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_3446_to_fp16 = const()[name = string("op_3446_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3446_to_fp16, x = input_185)[name = string("normed_185_cast_fp16")]; + tensor var_3456_split_sizes_0 = const()[name = string("op_3456_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3456_axis_0 = const()[name = string("op_3456_axis_0"), val = int32(-1)]; + tensor var_3456_0, tensor var_3456_1 = split(axis = var_3456_axis_0, split_sizes = var_3456_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3456")]; + tensor hidden_states_73 = mul(x = var_3456_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_121_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 7936])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 3, 8192])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_3484 = const()[name = string("op_3484"), val = tensor([0, 2, 1])]; + tensor input_187_axes_0 = const()[name = string("input_187_axes_0"), val = tensor([2])]; + tensor var_3485 = transpose(perm = var_3484, x = hidden_states_75_cast_fp16)[name = string("transpose_13")]; + tensor input_187 = expand_dims(axes = input_187_axes_0, x = var_3485)[name = string("input_187")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_187)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_3504 = const()[name = string("op_3504"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_3505_cast_fp16 = transpose(perm = var_3504, x = per_layer_slice_15_cast_fp16)[name = string("transpose_12")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_3505_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_189_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_189_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409429952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409757696))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_189_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_3521_axes_0 = const()[name = string("op_3521_axes_0"), val = tensor([2])]; + tensor var_3521_cast_fp16 = squeeze(axes = var_3521_axes_0, x = gated_47_cast_fp16)[name = string("op_3521_cast_fp16")]; + tensor var_3525 = const()[name = string("op_3525"), val = tensor([0, 2, 1])]; + int32 var_3531 = const()[name = string("op_3531"), val = int32(-1)]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_125_cast_fp16 = transpose(perm = var_3525, x = var_3521_cast_fp16)[name = string("transpose_11")]; + tensor var_3533_cast_fp16 = mul(x = x_125_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_3533_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_3531, interleave = input_191_interleave_0, values = (x_125_cast_fp16, var_3533_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_3528_to_fp16 = const()[name = string("op_3528_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3528_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor var_3538_split_sizes_0 = const()[name = string("op_3538_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3538_axis_0 = const()[name = string("op_3538_axis_0"), val = int32(-1)]; + tensor var_3538_cast_fp16_0, tensor var_3538_cast_fp16_1 = split(axis = var_3538_axis_0, split_sizes = var_3538_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3538_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409760320)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_3538_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor([0x1.a2p-1])]; + tensor x_127_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_63_promoted_to_fp16)[name = string("x_127_cast_fp16")]; + int32 var_3553 = const()[name = string("op_3553"), val = int32(-1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3555_cast_fp16 = mul(x = x_127_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_3555_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_3553, interleave = input_193_interleave_0, values = (x_127_cast_fp16, var_3555_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_3550_to_fp16 = const()[name = string("op_3550_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3550_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor var_3560_split_sizes_0 = const()[name = string("op_3560_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3560_axis_0 = const()[name = string("op_3560_axis_0"), val = int32(-1)]; + tensor var_3560_cast_fp16_0, tensor var_3560_cast_fp16_1 = split(axis = var_3560_axis_0, split_sizes = var_3560_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3560_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409765504)))]; + tensor h_49_cast_fp16 = mul(x = var_3560_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_3566 = const()[name = string("op_3566"), val = tensor([0, 2, 1])]; + tensor var_3569_axes_0 = const()[name = string("op_3569_axes_0"), val = tensor([2])]; + tensor var_3567_cast_fp16 = transpose(perm = var_3566, x = h_49_cast_fp16)[name = string("transpose_10")]; + tensor var_3569_cast_fp16 = expand_dims(axes = var_3569_axes_0, x = var_3567_cast_fp16)[name = string("op_3569_cast_fp16")]; + string q_81_pad_type_0 = const()[name = string("q_81_pad_type_0"), val = string("valid")]; + tensor q_81_strides_0 = const()[name = string("q_81_strides_0"), val = tensor([1, 1])]; + tensor q_81_pad_0 = const()[name = string("q_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_81_dilations_0 = const()[name = string("q_81_dilations_0"), val = tensor([1, 1])]; + int32 q_81_groups_0 = const()[name = string("q_81_groups_0"), val = int32(1)]; + tensor q_81 = conv(dilations = q_81_dilations_0, groups = q_81_groups_0, pad = q_81_pad_0, pad_type = q_81_pad_type_0, strides = q_81_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_3569_cast_fp16)[name = string("q_81")]; + tensor var_3590 = const()[name = string("op_3590"), val = tensor([1, 8, 256, 3])]; + tensor var_3591 = reshape(shape = var_3590, x = q_81)[name = string("op_3591")]; + tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_3614 = const()[name = string("op_3614"), val = tensor([3, 8, 256])]; + tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = var_3591)[name = string("transpose_9")]; + tensor x_129 = reshape(shape = var_3614, x = transpose_52)[name = string("x_129")]; + int32 var_3620 = const()[name = string("op_3620"), val = int32(-1)]; + fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)]; + tensor var_3622 = mul(x = x_129, y = const_65_promoted)[name = string("op_3622")]; + bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; + tensor input_197 = concat(axis = var_3620, interleave = input_197_interleave_0, values = (x_129, var_3622))[name = string("input_197")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_3617_to_fp16 = const()[name = string("op_3617_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_3617_to_fp16, x = input_197)[name = string("normed_197_cast_fp16")]; + tensor var_3627_split_sizes_0 = const()[name = string("op_3627_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3627_axis_0 = const()[name = string("op_3627_axis_0"), val = int32(-1)]; + tensor var_3627_0, tensor var_3627_1 = split(axis = var_3627_axis_0, split_sizes = var_3627_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_3627")]; + tensor q_85 = mul(x = var_3627_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_85")]; + tensor var_3634 = const()[name = string("op_3634"), val = tensor([1, 3, 8, 256])]; + tensor var_3635 = reshape(shape = var_3634, x = q_85)[name = string("op_3635")]; + tensor var_3640 = const()[name = string("op_3640"), val = tensor([0, 2, 1, 3])]; + tensor q_87 = transpose(perm = var_3640, x = var_3635)[name = string("transpose_8")]; + tensor var_3642_cast_fp16 = mul(x = q_87, y = cos_s)[name = string("op_3642_cast_fp16")]; + tensor var_3643_split_sizes_0 = const()[name = string("op_3643_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3643_axis_0 = const()[name = string("op_3643_axis_0"), val = int32(-1)]; + tensor var_3643_0, tensor var_3643_1 = split(axis = var_3643_axis_0, split_sizes = var_3643_split_sizes_0, x = q_87)[name = string("op_3643")]; + fp16 const_66_promoted = const()[name = string("const_66_promoted"), val = fp16(-0x1p+0)]; + tensor var_3645 = mul(x = var_3643_1, y = const_66_promoted)[name = string("op_3645")]; + int32 var_3647 = const()[name = string("op_3647"), val = int32(-1)]; + bool var_3648_interleave_0 = const()[name = string("op_3648_interleave_0"), val = bool(false)]; + tensor var_3648 = concat(axis = var_3647, interleave = var_3648_interleave_0, values = (var_3645, var_3643_0))[name = string("op_3648")]; + tensor var_3649_cast_fp16 = mul(x = var_3648, y = sin_s)[name = string("op_3649_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_3642_cast_fp16, y = var_3649_cast_fp16)[name = string("q_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_131_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_sliding)[name = string("x_131_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_131_cast_fp16)[name = string("reduce_max_8")]; + tensor var_3681 = sub(x = x_131_cast_fp16, y = reduce_max_8)[name = string("op_3681")]; + tensor var_3687 = exp(x = var_3681)[name = string("op_3687")]; + tensor var_3697_axes_0 = const()[name = string("op_3697_axes_0"), val = tensor([-1])]; + bool var_3697_keep_dims_0 = const()[name = string("op_3697_keep_dims_0"), val = bool(true)]; + tensor var_3697 = reduce_sum(axes = var_3697_axes_0, keep_dims = var_3697_keep_dims_0, x = var_3687)[name = string("op_3697")]; + tensor var_3703_cast_fp16 = real_div(x = var_3687, y = var_3697)[name = string("op_3703_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_3703_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_3714 = const()[name = string("op_3714"), val = tensor([0, 2, 1, 3])]; + tensor var_3721 = const()[name = string("op_3721"), val = tensor([1, 3, -1])]; + tensor var_3715_cast_fp16 = transpose(perm = var_3714, x = attn_output_49_cast_fp16)[name = string("transpose_7")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_3721, x = var_3715_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_3726 = const()[name = string("op_3726"), val = tensor([0, 2, 1])]; + string var_3742_pad_type_0 = const()[name = string("op_3742_pad_type_0"), val = string("valid")]; + int32 var_3742_groups_0 = const()[name = string("op_3742_groups_0"), val = int32(1)]; + tensor var_3742_strides_0 = const()[name = string("op_3742_strides_0"), val = tensor([1])]; + tensor var_3742_pad_0 = const()[name = string("op_3742_pad_0"), val = tensor([0, 0])]; + tensor var_3742_dilations_0 = const()[name = string("op_3742_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409770688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412392192))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3727_cast_fp16 = transpose(perm = var_3726, x = attn_output_51_cast_fp16)[name = string("transpose_6")]; + tensor var_3742_cast_fp16 = conv(dilations = var_3742_dilations_0, groups = var_3742_groups_0, pad = var_3742_pad_0, pad_type = var_3742_pad_type_0, strides = var_3742_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_3727_cast_fp16)[name = string("op_3742_cast_fp16")]; + tensor var_3746 = const()[name = string("op_3746"), val = tensor([0, 2, 1])]; + int32 var_3752 = const()[name = string("op_3752"), val = int32(-1)]; + fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_135_cast_fp16 = transpose(perm = var_3746, x = var_3742_cast_fp16)[name = string("transpose_5")]; + tensor var_3754_cast_fp16 = mul(x = x_135_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3754_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_3752, interleave = input_201_interleave_0, values = (x_135_cast_fp16, var_3754_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_3749_to_fp16 = const()[name = string("op_3749_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_3749_to_fp16, x = input_201_cast_fp16)[name = string("normed_201_cast_fp16")]; + tensor var_3759_split_sizes_0 = const()[name = string("op_3759_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3759_axis_0 = const()[name = string("op_3759_axis_0"), val = int32(-1)]; + tensor var_3759_cast_fp16_0, tensor var_3759_cast_fp16_1 = split(axis = var_3759_axis_0, split_sizes = var_3759_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_3759_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412394816)))]; + tensor attn_output_cast_fp16 = mul(x = var_3759_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_137_cast_fp16 = add(x = x_127_cast_fp16, y = attn_output_cast_fp16)[name = string("x_137_cast_fp16")]; + int32 var_3768 = const()[name = string("op_3768"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3770_cast_fp16 = mul(x = x_137_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_3770_cast_fp16")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203_cast_fp16 = concat(axis = var_3768, interleave = input_203_interleave_0, values = (x_137_cast_fp16, var_3770_cast_fp16))[name = string("input_203_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_3765_to_fp16 = const()[name = string("op_3765_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_3765_to_fp16, x = input_203_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor var_3775_split_sizes_0 = const()[name = string("op_3775_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3775_axis_0 = const()[name = string("op_3775_axis_0"), val = int32(-1)]; + tensor var_3775_cast_fp16_0, tensor var_3775_cast_fp16_1 = split(axis = var_3775_axis_0, split_sizes = var_3775_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_3775_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412400000)))]; + tensor h_51_cast_fp16 = mul(x = var_3775_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_3786 = const()[name = string("op_3786"), val = tensor([0, 2, 1])]; + tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; + tensor var_3787 = transpose(perm = var_3786, x = h_51_cast_fp16)[name = string("transpose_4")]; + tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_3787)[name = string("input_205")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_205)[name = string("gate_33")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_205)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_33)[name = string("gate")]; + tensor input_207 = mul(x = gate, y = up)[name = string("input_207")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_207)[name = string("mlp_out")]; + tensor var_3827_axes_0 = const()[name = string("op_3827_axes_0"), val = tensor([2])]; + tensor var_3827 = squeeze(axes = var_3827_axes_0, x = mlp_out)[name = string("op_3827")]; + tensor var_3831 = const()[name = string("op_3831"), val = tensor([0, 2, 1])]; + int32 var_3837 = const()[name = string("op_3837"), val = int32(-1)]; + fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)]; + tensor x_139 = transpose(perm = var_3831, x = var_3827)[name = string("transpose_3")]; + tensor var_3839 = mul(x = x_139, y = const_69_promoted)[name = string("op_3839")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209 = concat(axis = var_3837, interleave = input_209_interleave_0, values = (x_139, var_3839))[name = string("input_209")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_3834_to_fp16 = const()[name = string("op_3834_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_3834_to_fp16, x = input_209)[name = string("normed_209_cast_fp16")]; + tensor var_3844_split_sizes_0 = const()[name = string("op_3844_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3844_axis_0 = const()[name = string("op_3844_axis_0"), val = int32(-1)]; + tensor var_3844_0, tensor var_3844_1 = split(axis = var_3844_axis_0, split_sizes = var_3844_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_3844")]; + tensor hidden_states_83 = mul(x = var_3844_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_137_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 8192])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 3, 8448])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; + tensor var_3872 = const()[name = string("op_3872"), val = tensor([0, 2, 1])]; + tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; + tensor var_3873 = transpose(perm = var_3872, x = hidden_states_85_cast_fp16)[name = string("transpose_2")]; + tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_3873)[name = string("input_211")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_211)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_3892 = const()[name = string("op_3892"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_3893_cast_fp16 = transpose(perm = var_3892, x = per_layer_slice_cast_fp16)[name = string("transpose_1")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_3893_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_213_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_cast_fp16)[name = string("input_213_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412405184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412732928))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_213_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_3909_axes_0 = const()[name = string("op_3909_axes_0"), val = tensor([2])]; + tensor var_3909_cast_fp16 = squeeze(axes = var_3909_axes_0, x = gated_cast_fp16)[name = string("op_3909_cast_fp16")]; + tensor var_3913 = const()[name = string("op_3913"), val = tensor([0, 2, 1])]; + int32 var_3919 = const()[name = string("op_3919"), val = int32(-1)]; + fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_cast_fp16 = transpose(perm = var_3913, x = var_3909_cast_fp16)[name = string("transpose_0")]; + tensor var_3921_cast_fp16 = mul(x = x_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3921_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_3919, interleave = input_interleave_0, values = (x_cast_fp16, var_3921_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_3916_to_fp16 = const()[name = string("op_3916_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_3916_to_fp16, x = input_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_3926_split_sizes_0 = const()[name = string("op_3926_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3926_axis_0 = const()[name = string("op_3926_axis_0"), val = int32(-1)]; + tensor var_3926_cast_fp16_0, tensor var_3926_cast_fp16_1 = split(axis = var_3926_axis_0, split_sizes = var_3926_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_3926_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412735552)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_3926_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = tensor([0x1.b4p-1])]; + tensor hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_71_promoted_to_fp16)[name = string("op_3936_cast_fp16")]; + } -> (hidden_states_out); +} \ No newline at end of file diff --git a/chunk3.mlmodelc/weights/weight.bin b/chunk3.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c7703f8d2419ab345266f1a991aa5e4c487542f --- /dev/null +++ b/chunk3.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db45f9ce7443de57765ba412a0158ac2ad46a9d2f735fba9376bbdb0aa357b88 +size 412740736 diff --git a/chunk3_3way.mlmodelc/analytics/coremldata.bin b/chunk3_3way.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..de6ca6443658b6dc5c97fa73bb4701adf65c9858 --- /dev/null +++ b/chunk3_3way.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83149a33c6c49a2607a6e038d88d42ea6829bd7ae99bbc67bbba085b983cff48 +size 243 diff --git a/chunk3_3way.mlmodelc/coremldata.bin b/chunk3_3way.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa08a79a1986ac94ae90668c5d079531bf3964be --- /dev/null +++ b/chunk3_3way.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346fab1c61528cc8fccc0b5f0d65bb8943b6ebafb0f983a1d6cb1361047195d5 +size 780 diff --git a/chunk3_3way.mlmodelc/metadata.json b/chunk3_3way.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f8e093f28fed01a3e21f1463cb42d267e9b7e8bd --- /dev/null +++ b/chunk3_3way.mlmodelc/metadata.json @@ -0,0 +1,224 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (17 bits), Palettized (7 bits), UInt4)", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Int32", + "formattedType" : "MultiArray (Int32 1)", + "shortDescription" : "", + "shape" : "[1]", + "name" : "token_id", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1)", + "shortDescription" : "", + "shape" : "[1]", + "name" : "token_logit", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 2560)", + "shortDescription" : "", + "shape" : "[1, 1, 2560]", + "name" : "hidden_states_out", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.expandDims" : 37, + "Ios18.mul" : 166, + "Ios18.matmul" : 18, + "Identity" : 1, + "Ios18.exp" : 9, + "Ios18.realDiv" : 9, + "Split" : 64, + "Ios18.gatherAlongAxis" : 1, + "Ios16.reduceMax" : 9, + "Tile" : 4, + "Ios16.reduceSum" : 9, + "Ios18.add" : 45, + "Ios18.layerNorm" : 55, + "Ios18.reduceArgmax" : 1, + "Ios18.reshape" : 44, + "Ios18.constexprLutToDense" : 64, + "Ios18.conv" : 64, + "Ios18.gelu" : 18, + "Ios18.concat" : 64, + "Ios18.sub" : 9, + "Ios18.transpose" : 104, + "Ios18.tanh" : 1, + "Ios18.squeeze" : 20, + "Ios18.sliceByIndex" : 9 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.conversion_date" : "2026-04-30", + "com.github.apple.coremltools.source" : "torch==2.11.0", + "com.github.apple.coremltools.version" : "9.0", + "com.github.apple.coremltools.source_dialect" : "TorchScript" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 2560)", + "shortDescription" : "", + "shape" : "[1, 1, 2560]", + "name" : "hidden_states", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 2048]", + "name" : "causal_mask_full", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 512]", + "name" : "causal_mask_sliding", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 1)", + "shortDescription" : "", + "shape" : "[1, 1, 2048, 1]", + "name" : "update_mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 10752)", + "shortDescription" : "", + "shape" : "[1, 1, 10752]", + "name" : "per_layer_combined", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 256]", + "name" : "cos_s", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 256]", + "name" : "sin_s", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 512]", + "name" : "cos_f", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 1, 512]", + "name" : "sin_f", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)", + "shortDescription" : "", + "shape" : "[1, 2, 512, 256]", + "name" : "kv13_k", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)", + "shortDescription" : "", + "shape" : "[1, 2, 512, 256]", + "name" : "kv13_v", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)", + "shortDescription" : "", + "shape" : "[1, 2, 2048, 512]", + "name" : "kv14_k", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)", + "shortDescription" : "", + "shape" : "[1, 2, 2048, 512]", + "name" : "kv14_v", + "type" : "MultiArray" + } + ], + "generatedClassName" : "chunk3_3way", + "method" : "predict" + } +] \ No newline at end of file diff --git a/chunk3_3way.mlmodelc/model.mil b/chunk3_3way.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..40c2489f08b885fb9c2fbb442f2c20ab58a077d4 --- /dev/null +++ b/chunk3_3way.mlmodelc/model.mil @@ -0,0 +1,1971 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func main(tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor kv13_k, tensor kv13_v, tensor kv14_k, tensor kv14_v, tensor per_layer_combined, tensor sin_f, tensor sin_s, tensor update_mask) { + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2621568))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2623680)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2624256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15731520))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15741824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28849088))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28859392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41966656))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41969280)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41974464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302208))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44924032))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44926144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58033408))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58043712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71150976))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71161280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84268544))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84271168)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84276352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604096))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89847360))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_self_attn_q_norm_weight = const()[name = string("layers_2_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89851520)))]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89852608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102959872))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102970176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116077440))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116087744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129195008))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129197632)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129202816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129530560))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129530880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132152384))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132154496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145261760))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145272064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158379328))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158389632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171496896))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171499520)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171504704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171832448))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171832768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174454272))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174456384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187563648))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187573952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200681216))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200691520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213798784))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213801408)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213806592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214134336))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214134656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216756160))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216758272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229865536))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229875840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242983104))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242993408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256100672))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256103296)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256108480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436224))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259058048))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259060160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272167424))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272177728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285284992))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298402560))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298405184)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298410368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738112))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301359936))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314469312))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314479616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327586880))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327597184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340704448))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340707072)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340712256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040000))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346283264))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346287424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359394688))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359404992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372512256))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372522560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385629824))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385632448)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385637632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385965376))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + int32 var_452 = const()[name = string("op_452"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_454_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_454_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_452, interleave = input_1_interleave_0, values = (hidden_states, var_454_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_449_to_fp16 = const()[name = string("op_449_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_449_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_459_split_sizes_0 = const()[name = string("op_459_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_459_axis_0 = const()[name = string("op_459_axis_0"), val = int32(-1)]; + tensor var_459_cast_fp16_0, tensor var_459_cast_fp16_1 = split(axis = var_459_axis_0, split_sizes = var_459_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_459_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385965696)))]; + tensor h_1_cast_fp16 = mul(x = var_459_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_465 = const()[name = string("op_465"), val = tensor([0, 2, 1])]; + tensor var_468_axes_0 = const()[name = string("op_468_axes_0"), val = tensor([2])]; + tensor var_466_cast_fp16 = transpose(perm = var_465, x = h_1_cast_fp16)[name = string("transpose_103")]; + tensor var_468_cast_fp16 = expand_dims(axes = var_468_axes_0, x = var_466_cast_fp16)[name = string("op_468_cast_fp16")]; + string var_484_pad_type_0 = const()[name = string("op_484_pad_type_0"), val = string("valid")]; + tensor var_484_strides_0 = const()[name = string("op_484_strides_0"), val = tensor([1, 1])]; + tensor var_484_pad_0 = const()[name = string("op_484_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_484_dilations_0 = const()[name = string("op_484_dilations_0"), val = tensor([1, 1])]; + int32 var_484_groups_0 = const()[name = string("op_484_groups_0"), val = int32(1)]; + tensor var_484 = conv(dilations = var_484_dilations_0, groups = var_484_groups_0, pad = var_484_pad_0, pad_type = var_484_pad_type_0, strides = var_484_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_468_cast_fp16)[name = string("op_484")]; + tensor var_489 = const()[name = string("op_489"), val = tensor([1, 8, 256, 1])]; + tensor var_490 = reshape(shape = var_489, x = var_484)[name = string("op_490")]; + tensor var_495 = const()[name = string("op_495"), val = tensor([0, 1, 3, 2])]; + tensor var_505 = const()[name = string("op_505"), val = tensor([1, 8, 256])]; + tensor var_496 = transpose(perm = var_495, x = var_490)[name = string("transpose_102")]; + tensor x_1 = reshape(shape = var_505, x = var_496)[name = string("x_1")]; + int32 var_511 = const()[name = string("op_511"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_513 = mul(x = x_1, y = const_1_promoted)[name = string("op_513")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_511, interleave = input_5_interleave_0, values = (x_1, var_513))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_508_to_fp16 = const()[name = string("op_508_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_508_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor var_518_split_sizes_0 = const()[name = string("op_518_split_sizes_0"), val = tensor([256, 256])]; + int32 var_518_axis_0 = const()[name = string("op_518_axis_0"), val = int32(-1)]; + tensor var_518_0, tensor var_518_1 = split(axis = var_518_axis_0, split_sizes = var_518_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_518")]; + tensor var_520 = mul(x = var_518_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_520")]; + tensor var_525 = const()[name = string("op_525"), val = tensor([1, 8, 1, 256])]; + tensor q_3 = reshape(shape = var_525, x = var_520)[name = string("q_3")]; + tensor var_527_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_527_cast_fp16")]; + tensor var_528_split_sizes_0 = const()[name = string("op_528_split_sizes_0"), val = tensor([128, 128])]; + int32 var_528_axis_0 = const()[name = string("op_528_axis_0"), val = int32(-1)]; + tensor var_528_0, tensor var_528_1 = split(axis = var_528_axis_0, split_sizes = var_528_split_sizes_0, x = q_3)[name = string("op_528")]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_530 = mul(x = var_528_1, y = const_2_promoted)[name = string("op_530")]; + int32 var_532 = const()[name = string("op_532"), val = int32(-1)]; + bool var_533_interleave_0 = const()[name = string("op_533_interleave_0"), val = bool(false)]; + tensor var_533 = concat(axis = var_532, interleave = var_533_interleave_0, values = (var_530, var_528_0))[name = string("op_533")]; + tensor var_534_cast_fp16 = mul(x = var_533, y = sin_s)[name = string("op_534_cast_fp16")]; + tensor q_5_cast_fp16 = add(x = var_527_cast_fp16, y = var_534_cast_fp16)[name = string("q_5_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = kv13_k)[name = string("transpose_101")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_100")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = kv13_v)[name = string("transpose_99")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_98")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_97")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_5_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_3_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_3_cast_fp16)[name = string("reduce_max_0")]; + tensor var_566 = sub(x = x_3_cast_fp16, y = reduce_max_0)[name = string("op_566")]; + tensor var_572 = exp(x = var_566)[name = string("op_572")]; + tensor var_582_axes_0 = const()[name = string("op_582_axes_0"), val = tensor([-1])]; + bool var_582_keep_dims_0 = const()[name = string("op_582_keep_dims_0"), val = bool(true)]; + tensor var_582 = reduce_sum(axes = var_582_axes_0, keep_dims = var_582_keep_dims_0, x = var_572)[name = string("op_582")]; + tensor var_588_cast_fp16 = real_div(x = var_572, y = var_582)[name = string("op_588_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_96")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_588_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_599 = const()[name = string("op_599"), val = tensor([0, 2, 1, 3])]; + tensor var_606 = const()[name = string("op_606"), val = tensor([1, 1, -1])]; + tensor var_600_cast_fp16 = transpose(perm = var_599, x = attn_output_1_cast_fp16)[name = string("transpose_95")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_606, x = var_600_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_611 = const()[name = string("op_611"), val = tensor([0, 2, 1])]; + string var_627_pad_type_0 = const()[name = string("op_627_pad_type_0"), val = string("valid")]; + int32 var_627_groups_0 = const()[name = string("op_627_groups_0"), val = int32(1)]; + tensor var_627_strides_0 = const()[name = string("op_627_strides_0"), val = tensor([1])]; + tensor var_627_pad_0 = const()[name = string("op_627_pad_0"), val = tensor([0, 0])]; + tensor var_627_dilations_0 = const()[name = string("op_627_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385970880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388592384))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_612_cast_fp16 = transpose(perm = var_611, x = attn_output_3_cast_fp16)[name = string("transpose_94")]; + tensor var_627_cast_fp16 = conv(dilations = var_627_dilations_0, groups = var_627_groups_0, pad = var_627_pad_0, pad_type = var_627_pad_type_0, strides = var_627_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_612_cast_fp16)[name = string("op_627_cast_fp16")]; + tensor var_631 = const()[name = string("op_631"), val = tensor([0, 2, 1])]; + int32 var_637 = const()[name = string("op_637"), val = int32(-1)]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_7_cast_fp16 = transpose(perm = var_631, x = var_627_cast_fp16)[name = string("transpose_93")]; + tensor var_639_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_639_cast_fp16")]; + bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; + tensor input_9_cast_fp16 = concat(axis = var_637, interleave = input_9_interleave_0, values = (x_7_cast_fp16, var_639_cast_fp16))[name = string("input_9_cast_fp16")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_634_to_fp16 = const()[name = string("op_634_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_634_to_fp16, x = input_9_cast_fp16)[name = string("normed_9_cast_fp16")]; + tensor var_644_split_sizes_0 = const()[name = string("op_644_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_644_axis_0 = const()[name = string("op_644_axis_0"), val = int32(-1)]; + tensor var_644_cast_fp16_0, tensor var_644_cast_fp16_1 = split(axis = var_644_axis_0, split_sizes = var_644_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_644_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388595008)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_644_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_9_cast_fp16")]; + int32 var_653 = const()[name = string("op_653"), val = int32(-1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_655_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_655_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_653, interleave = input_11_interleave_0, values = (x_9_cast_fp16, var_655_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_650_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor var_660_split_sizes_0 = const()[name = string("op_660_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_660_axis_0 = const()[name = string("op_660_axis_0"), val = int32(-1)]; + tensor var_660_cast_fp16_0, tensor var_660_cast_fp16_1 = split(axis = var_660_axis_0, split_sizes = var_660_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_660_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388600192)))]; + tensor h_3_cast_fp16 = mul(x = var_660_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_671 = const()[name = string("op_671"), val = tensor([0, 2, 1])]; + tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; + tensor var_672 = transpose(perm = var_671, x = h_3_cast_fp16)[name = string("transpose_92")]; + tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_672)[name = string("input_13")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_15 = mul(x = gate_3, y = up_1)[name = string("input_15")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_15)[name = string("mlp_out_1")]; + tensor var_712_axes_0 = const()[name = string("op_712_axes_0"), val = tensor([2])]; + tensor var_712 = squeeze(axes = var_712_axes_0, x = mlp_out_1)[name = string("op_712")]; + tensor var_716 = const()[name = string("op_716"), val = tensor([0, 2, 1])]; + int32 var_722 = const()[name = string("op_722"), val = int32(-1)]; + fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; + tensor x_11 = transpose(perm = var_716, x = var_712)[name = string("transpose_91")]; + tensor var_724 = mul(x = x_11, y = const_5_promoted)[name = string("op_724")]; + bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; + tensor input_17 = concat(axis = var_722, interleave = input_17_interleave_0, values = (x_11, var_724))[name = string("input_17")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_719_to_fp16 = const()[name = string("op_719_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_719_to_fp16, x = input_17)[name = string("normed_17_cast_fp16")]; + tensor var_729_split_sizes_0 = const()[name = string("op_729_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_729_axis_0 = const()[name = string("op_729_axis_0"), val = int32(-1)]; + tensor var_729_0, tensor var_729_1 = split(axis = var_729_axis_0, split_sizes = var_729_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_729")]; + tensor hidden_states_3 = mul(x = var_729_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 8448])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 1, 8704])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_757 = const()[name = string("op_757"), val = tensor([0, 2, 1])]; + tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([2])]; + tensor var_758 = transpose(perm = var_757, x = hidden_states_5_cast_fp16)[name = string("transpose_90")]; + tensor input_19 = expand_dims(axes = input_19_axes_0, x = var_758)[name = string("input_19")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_19)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_777 = const()[name = string("op_777"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_778_cast_fp16 = transpose(perm = var_777, x = per_layer_slice_1_cast_fp16)[name = string("transpose_89")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_778_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_21_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_21_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388605376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388933120))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_21_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_794_axes_0 = const()[name = string("op_794_axes_0"), val = tensor([2])]; + tensor var_794_cast_fp16 = squeeze(axes = var_794_axes_0, x = gated_5_cast_fp16)[name = string("op_794_cast_fp16")]; + tensor var_798 = const()[name = string("op_798"), val = tensor([0, 2, 1])]; + int32 var_804 = const()[name = string("op_804"), val = int32(-1)]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_13_cast_fp16 = transpose(perm = var_798, x = var_794_cast_fp16)[name = string("transpose_88")]; + tensor var_806_cast_fp16 = mul(x = x_13_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_806_cast_fp16")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23_cast_fp16 = concat(axis = var_804, interleave = input_23_interleave_0, values = (x_13_cast_fp16, var_806_cast_fp16))[name = string("input_23_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_801_to_fp16 = const()[name = string("op_801_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_801_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor var_811_split_sizes_0 = const()[name = string("op_811_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_811_axis_0 = const()[name = string("op_811_axis_0"), val = int32(-1)]; + tensor var_811_cast_fp16_0, tensor var_811_cast_fp16_1 = split(axis = var_811_axis_0, split_sizes = var_811_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_811_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388935744)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_811_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = tensor([0x1.a6p-1])]; + tensor x_15_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("x_15_cast_fp16")]; + int32 var_826 = const()[name = string("op_826"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_828_cast_fp16 = mul(x = x_15_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_828_cast_fp16")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25_cast_fp16 = concat(axis = var_826, interleave = input_25_interleave_0, values = (x_15_cast_fp16, var_828_cast_fp16))[name = string("input_25_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_823_to_fp16 = const()[name = string("op_823_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_823_to_fp16, x = input_25_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor var_833_split_sizes_0 = const()[name = string("op_833_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_833_axis_0 = const()[name = string("op_833_axis_0"), val = int32(-1)]; + tensor var_833_cast_fp16_0, tensor var_833_cast_fp16_1 = split(axis = var_833_axis_0, split_sizes = var_833_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_833_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388940928)))]; + tensor h_7_cast_fp16 = mul(x = var_833_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_839 = const()[name = string("op_839"), val = tensor([0, 2, 1])]; + tensor var_842_axes_0 = const()[name = string("op_842_axes_0"), val = tensor([2])]; + tensor var_840_cast_fp16 = transpose(perm = var_839, x = h_7_cast_fp16)[name = string("transpose_87")]; + tensor var_842_cast_fp16 = expand_dims(axes = var_842_axes_0, x = var_840_cast_fp16)[name = string("op_842_cast_fp16")]; + string var_858_pad_type_0 = const()[name = string("op_858_pad_type_0"), val = string("valid")]; + tensor var_858_strides_0 = const()[name = string("op_858_strides_0"), val = tensor([1, 1])]; + tensor var_858_pad_0 = const()[name = string("op_858_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_858_dilations_0 = const()[name = string("op_858_dilations_0"), val = tensor([1, 1])]; + int32 var_858_groups_0 = const()[name = string("op_858_groups_0"), val = int32(1)]; + tensor var_858 = conv(dilations = var_858_dilations_0, groups = var_858_groups_0, pad = var_858_pad_0, pad_type = var_858_pad_type_0, strides = var_858_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_842_cast_fp16)[name = string("op_858")]; + tensor var_863 = const()[name = string("op_863"), val = tensor([1, 8, 256, 1])]; + tensor var_864 = reshape(shape = var_863, x = var_858)[name = string("op_864")]; + tensor var_869 = const()[name = string("op_869"), val = tensor([0, 1, 3, 2])]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 8, 256])]; + tensor var_870 = transpose(perm = var_869, x = var_864)[name = string("transpose_86")]; + tensor x_17 = reshape(shape = var_879, x = var_870)[name = string("x_17")]; + int32 var_885 = const()[name = string("op_885"), val = int32(-1)]; + fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; + tensor var_887 = mul(x = x_17, y = const_9_promoted)[name = string("op_887")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29 = concat(axis = var_885, interleave = input_29_interleave_0, values = (x_17, var_887))[name = string("input_29")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_882_to_fp16 = const()[name = string("op_882_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_882_to_fp16, x = input_29)[name = string("normed_29_cast_fp16")]; + tensor var_892_split_sizes_0 = const()[name = string("op_892_split_sizes_0"), val = tensor([256, 256])]; + int32 var_892_axis_0 = const()[name = string("op_892_axis_0"), val = int32(-1)]; + tensor var_892_0, tensor var_892_1 = split(axis = var_892_axis_0, split_sizes = var_892_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_892")]; + tensor var_894 = mul(x = var_892_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_894")]; + tensor var_899 = const()[name = string("op_899"), val = tensor([1, 8, 1, 256])]; + tensor q_9 = reshape(shape = var_899, x = var_894)[name = string("q_9")]; + tensor var_901_cast_fp16 = mul(x = q_9, y = cos_s)[name = string("op_901_cast_fp16")]; + tensor var_902_split_sizes_0 = const()[name = string("op_902_split_sizes_0"), val = tensor([128, 128])]; + int32 var_902_axis_0 = const()[name = string("op_902_axis_0"), val = int32(-1)]; + tensor var_902_0, tensor var_902_1 = split(axis = var_902_axis_0, split_sizes = var_902_split_sizes_0, x = q_9)[name = string("op_902")]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor var_904 = mul(x = var_902_1, y = const_10_promoted)[name = string("op_904")]; + int32 var_906 = const()[name = string("op_906"), val = int32(-1)]; + bool var_907_interleave_0 = const()[name = string("op_907_interleave_0"), val = bool(false)]; + tensor var_907 = concat(axis = var_906, interleave = var_907_interleave_0, values = (var_904, var_902_0))[name = string("op_907")]; + tensor var_908_cast_fp16 = mul(x = var_907, y = sin_s)[name = string("op_908_cast_fp16")]; + tensor q_11_cast_fp16 = add(x = var_901_cast_fp16, y = var_908_cast_fp16)[name = string("q_11_cast_fp16")]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_11_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_19_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_19_cast_fp16)[name = string("reduce_max_1")]; + tensor var_940 = sub(x = x_19_cast_fp16, y = reduce_max_1)[name = string("op_940")]; + tensor var_946 = exp(x = var_940)[name = string("op_946")]; + tensor var_956_axes_0 = const()[name = string("op_956_axes_0"), val = tensor([-1])]; + bool var_956_keep_dims_0 = const()[name = string("op_956_keep_dims_0"), val = bool(true)]; + tensor var_956 = reduce_sum(axes = var_956_axes_0, keep_dims = var_956_keep_dims_0, x = var_946)[name = string("op_956")]; + tensor var_962_cast_fp16 = real_div(x = var_946, y = var_956)[name = string("op_962_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_962_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_973 = const()[name = string("op_973"), val = tensor([0, 2, 1, 3])]; + tensor var_980 = const()[name = string("op_980"), val = tensor([1, 1, -1])]; + tensor var_974_cast_fp16 = transpose(perm = var_973, x = attn_output_7_cast_fp16)[name = string("transpose_85")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_980, x = var_974_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_985 = const()[name = string("op_985"), val = tensor([0, 2, 1])]; + string var_1001_pad_type_0 = const()[name = string("op_1001_pad_type_0"), val = string("valid")]; + int32 var_1001_groups_0 = const()[name = string("op_1001_groups_0"), val = int32(1)]; + tensor var_1001_strides_0 = const()[name = string("op_1001_strides_0"), val = tensor([1])]; + tensor var_1001_pad_0 = const()[name = string("op_1001_pad_0"), val = tensor([0, 0])]; + tensor var_1001_dilations_0 = const()[name = string("op_1001_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388946112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391567616))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_986_cast_fp16 = transpose(perm = var_985, x = attn_output_9_cast_fp16)[name = string("transpose_84")]; + tensor var_1001_cast_fp16 = conv(dilations = var_1001_dilations_0, groups = var_1001_groups_0, pad = var_1001_pad_0, pad_type = var_1001_pad_type_0, strides = var_1001_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_986_cast_fp16)[name = string("op_1001_cast_fp16")]; + tensor var_1005 = const()[name = string("op_1005"), val = tensor([0, 2, 1])]; + int32 var_1011 = const()[name = string("op_1011"), val = int32(-1)]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_23_cast_fp16 = transpose(perm = var_1005, x = var_1001_cast_fp16)[name = string("transpose_83")]; + tensor var_1013_cast_fp16 = mul(x = x_23_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1013_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_1011, interleave = input_33_interleave_0, values = (x_23_cast_fp16, var_1013_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1008_to_fp16, x = input_33_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor var_1018_split_sizes_0 = const()[name = string("op_1018_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1018_axis_0 = const()[name = string("op_1018_axis_0"), val = int32(-1)]; + tensor var_1018_cast_fp16_0, tensor var_1018_cast_fp16_1 = split(axis = var_1018_axis_0, split_sizes = var_1018_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1018_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391570240)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1018_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_15_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_1027 = const()[name = string("op_1027"), val = int32(-1)]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1029_cast_fp16 = mul(x = x_25_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1029_cast_fp16")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35_cast_fp16 = concat(axis = var_1027, interleave = input_35_interleave_0, values = (x_25_cast_fp16, var_1029_cast_fp16))[name = string("input_35_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1024_to_fp16 = const()[name = string("op_1024_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1024_to_fp16, x = input_35_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor var_1034_split_sizes_0 = const()[name = string("op_1034_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1034_axis_0 = const()[name = string("op_1034_axis_0"), val = int32(-1)]; + tensor var_1034_cast_fp16_0, tensor var_1034_cast_fp16_1 = split(axis = var_1034_axis_0, split_sizes = var_1034_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391575424)))]; + tensor h_9_cast_fp16 = mul(x = var_1034_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1045 = const()[name = string("op_1045"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_1046 = transpose(perm = var_1045, x = h_9_cast_fp16)[name = string("transpose_82")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_1046)[name = string("input_37")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_37)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_39 = mul(x = gate_7, y = up_3)[name = string("input_39")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_39)[name = string("mlp_out_3")]; + tensor var_1086_axes_0 = const()[name = string("op_1086_axes_0"), val = tensor([2])]; + tensor var_1086 = squeeze(axes = var_1086_axes_0, x = mlp_out_3)[name = string("op_1086")]; + tensor var_1090 = const()[name = string("op_1090"), val = tensor([0, 2, 1])]; + int32 var_1096 = const()[name = string("op_1096"), val = int32(-1)]; + fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; + tensor x_27 = transpose(perm = var_1090, x = var_1086)[name = string("transpose_81")]; + tensor var_1098 = mul(x = x_27, y = const_13_promoted)[name = string("op_1098")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_1096, interleave = input_41_interleave_0, values = (x_27, var_1098))[name = string("input_41")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1093_to_fp16 = const()[name = string("op_1093_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1093_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")]; + tensor var_1103_split_sizes_0 = const()[name = string("op_1103_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1103_axis_0 = const()[name = string("op_1103_axis_0"), val = int32(-1)]; + tensor var_1103_0, tensor var_1103_1 = split(axis = var_1103_axis_0, split_sizes = var_1103_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1103")]; + tensor hidden_states_13 = mul(x = var_1103_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 8704])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 1, 8960])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1131 = const()[name = string("op_1131"), val = tensor([0, 2, 1])]; + tensor input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor([2])]; + tensor var_1132 = transpose(perm = var_1131, x = hidden_states_15_cast_fp16)[name = string("transpose_80")]; + tensor input_43 = expand_dims(axes = input_43_axes_0, x = var_1132)[name = string("input_43")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_43)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1151 = const()[name = string("op_1151"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1152_cast_fp16 = transpose(perm = var_1151, x = per_layer_slice_3_cast_fp16)[name = string("transpose_79")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1152_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_45_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_45_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391580608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391908352))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_45_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_1168_axes_0 = const()[name = string("op_1168_axes_0"), val = tensor([2])]; + tensor var_1168_cast_fp16 = squeeze(axes = var_1168_axes_0, x = gated_11_cast_fp16)[name = string("op_1168_cast_fp16")]; + tensor var_1172 = const()[name = string("op_1172"), val = tensor([0, 2, 1])]; + int32 var_1178 = const()[name = string("op_1178"), val = int32(-1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_29_cast_fp16 = transpose(perm = var_1172, x = var_1168_cast_fp16)[name = string("transpose_78")]; + tensor var_1180_cast_fp16 = mul(x = x_29_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1180_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_1178, interleave = input_47_interleave_0, values = (x_29_cast_fp16, var_1180_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1175_to_fp16 = const()[name = string("op_1175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1175_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1185_split_sizes_0 = const()[name = string("op_1185_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1185_axis_0 = const()[name = string("op_1185_axis_0"), val = int32(-1)]; + tensor var_1185_cast_fp16_0, tensor var_1185_cast_fp16_1 = split(axis = var_1185_axis_0, split_sizes = var_1185_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1185_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391910976)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_1185_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = tensor([0x1.acp-1])]; + tensor x_31_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_15_promoted_to_fp16)[name = string("x_31_cast_fp16")]; + int32 var_1200 = const()[name = string("op_1200"), val = int32(-1)]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1202_cast_fp16 = mul(x = x_31_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1202_cast_fp16")]; + bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; + tensor input_49_cast_fp16 = concat(axis = var_1200, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1202_cast_fp16))[name = string("input_49_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1197_to_fp16 = const()[name = string("op_1197_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1197_to_fp16, x = input_49_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor var_1207_split_sizes_0 = const()[name = string("op_1207_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1207_axis_0 = const()[name = string("op_1207_axis_0"), val = int32(-1)]; + tensor var_1207_cast_fp16_0, tensor var_1207_cast_fp16_1 = split(axis = var_1207_axis_0, split_sizes = var_1207_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1207_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391916160)))]; + tensor h_13_cast_fp16 = mul(x = var_1207_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_1213 = const()[name = string("op_1213"), val = tensor([0, 2, 1])]; + tensor var_1216_axes_0 = const()[name = string("op_1216_axes_0"), val = tensor([2])]; + tensor var_1214_cast_fp16 = transpose(perm = var_1213, x = h_13_cast_fp16)[name = string("transpose_77")]; + tensor var_1216_cast_fp16 = expand_dims(axes = var_1216_axes_0, x = var_1214_cast_fp16)[name = string("op_1216_cast_fp16")]; + string var_1232_pad_type_0 = const()[name = string("op_1232_pad_type_0"), val = string("valid")]; + tensor var_1232_strides_0 = const()[name = string("op_1232_strides_0"), val = tensor([1, 1])]; + tensor var_1232_pad_0 = const()[name = string("op_1232_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1232_dilations_0 = const()[name = string("op_1232_dilations_0"), val = tensor([1, 1])]; + int32 var_1232_groups_0 = const()[name = string("op_1232_groups_0"), val = int32(1)]; + tensor var_1232 = conv(dilations = var_1232_dilations_0, groups = var_1232_groups_0, pad = var_1232_pad_0, pad_type = var_1232_pad_type_0, strides = var_1232_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1216_cast_fp16)[name = string("op_1232")]; + tensor var_1237 = const()[name = string("op_1237"), val = tensor([1, 8, 512, 1])]; + tensor var_1238 = reshape(shape = var_1237, x = var_1232)[name = string("op_1238")]; + tensor var_1243 = const()[name = string("op_1243"), val = tensor([0, 1, 3, 2])]; + tensor var_1253 = const()[name = string("op_1253"), val = tensor([1, 8, 512])]; + tensor var_1244 = transpose(perm = var_1243, x = var_1238)[name = string("transpose_76")]; + tensor x_33 = reshape(shape = var_1253, x = var_1244)[name = string("x_33")]; + int32 var_1259 = const()[name = string("op_1259"), val = int32(-1)]; + fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; + tensor var_1261 = mul(x = x_33, y = const_17_promoted)[name = string("op_1261")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53 = concat(axis = var_1259, interleave = input_53_interleave_0, values = (x_33, var_1261))[name = string("input_53")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1256_to_fp16 = const()[name = string("op_1256_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1256_to_fp16, x = input_53)[name = string("normed_53_cast_fp16")]; + tensor var_1266_split_sizes_0 = const()[name = string("op_1266_split_sizes_0"), val = tensor([512, 512])]; + int32 var_1266_axis_0 = const()[name = string("op_1266_axis_0"), val = int32(-1)]; + tensor var_1266_0, tensor var_1266_1 = split(axis = var_1266_axis_0, split_sizes = var_1266_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1266")]; + tensor var_1268 = mul(x = var_1266_0, y = layers_2_self_attn_q_norm_weight)[name = string("op_1268")]; + tensor var_1273 = const()[name = string("op_1273"), val = tensor([1, 8, 1, 512])]; + tensor q_15 = reshape(shape = var_1273, x = var_1268)[name = string("q_15")]; + tensor var_1275_cast_fp16 = mul(x = q_15, y = cos_f)[name = string("op_1275_cast_fp16")]; + tensor var_1276_split_sizes_0 = const()[name = string("op_1276_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1276_axis_0 = const()[name = string("op_1276_axis_0"), val = int32(-1)]; + tensor var_1276_0, tensor var_1276_1 = split(axis = var_1276_axis_0, split_sizes = var_1276_split_sizes_0, x = q_15)[name = string("op_1276")]; + fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)]; + tensor var_1278 = mul(x = var_1276_1, y = const_18_promoted)[name = string("op_1278")]; + int32 var_1280 = const()[name = string("op_1280"), val = int32(-1)]; + bool var_1281_interleave_0 = const()[name = string("op_1281_interleave_0"), val = bool(false)]; + tensor var_1281 = concat(axis = var_1280, interleave = var_1281_interleave_0, values = (var_1278, var_1276_0))[name = string("op_1281")]; + tensor var_1282_cast_fp16 = mul(x = var_1281, y = sin_f)[name = string("op_1282_cast_fp16")]; + tensor q_17_cast_fp16 = add(x = var_1275_cast_fp16, y = var_1282_cast_fp16)[name = string("q_17_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = kv14_k)[name = string("transpose_75")]; + tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_8, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_74")]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_9, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = kv14_v)[name = string("transpose_73")]; + tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_10, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; + tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_72")]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_11, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor transpose_38_cast_fp16 = transpose(perm = transpose_38_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_71")]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_17_cast_fp16, y = transpose_38_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_full)[name = string("x_35_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_35_cast_fp16)[name = string("reduce_max_2")]; + tensor var_1314 = sub(x = x_35_cast_fp16, y = reduce_max_2)[name = string("op_1314")]; + tensor var_1320 = exp(x = var_1314)[name = string("op_1320")]; + tensor var_1330_axes_0 = const()[name = string("op_1330_axes_0"), val = tensor([-1])]; + bool var_1330_keep_dims_0 = const()[name = string("op_1330_keep_dims_0"), val = bool(true)]; + tensor var_1330 = reduce_sum(axes = var_1330_axes_0, keep_dims = var_1330_keep_dims_0, x = var_1320)[name = string("op_1330")]; + tensor var_1336_cast_fp16 = real_div(x = var_1320, y = var_1330)[name = string("op_1336_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_70")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_1336_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_1347 = const()[name = string("op_1347"), val = tensor([0, 2, 1, 3])]; + tensor var_1354 = const()[name = string("op_1354"), val = tensor([1, 1, -1])]; + tensor var_1348_cast_fp16 = transpose(perm = var_1347, x = attn_output_13_cast_fp16)[name = string("transpose_69")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_1354, x = var_1348_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_1359 = const()[name = string("op_1359"), val = tensor([0, 2, 1])]; + string var_1375_pad_type_0 = const()[name = string("op_1375_pad_type_0"), val = string("valid")]; + int32 var_1375_groups_0 = const()[name = string("op_1375_groups_0"), val = int32(1)]; + tensor var_1375_strides_0 = const()[name = string("op_1375_strides_0"), val = tensor([1])]; + tensor var_1375_pad_0 = const()[name = string("op_1375_pad_0"), val = tensor([0, 0])]; + tensor var_1375_dilations_0 = const()[name = string("op_1375_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391921344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397164288))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1360_cast_fp16 = transpose(perm = var_1359, x = attn_output_15_cast_fp16)[name = string("transpose_68")]; + tensor var_1375_cast_fp16 = conv(dilations = var_1375_dilations_0, groups = var_1375_groups_0, pad = var_1375_pad_0, pad_type = var_1375_pad_type_0, strides = var_1375_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_1360_cast_fp16)[name = string("op_1375_cast_fp16")]; + tensor var_1379 = const()[name = string("op_1379"), val = tensor([0, 2, 1])]; + int32 var_1385 = const()[name = string("op_1385"), val = int32(-1)]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_39_cast_fp16 = transpose(perm = var_1379, x = var_1375_cast_fp16)[name = string("transpose_67")]; + tensor var_1387_cast_fp16 = mul(x = x_39_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1387_cast_fp16")]; + bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; + tensor input_57_cast_fp16 = concat(axis = var_1385, interleave = input_57_interleave_0, values = (x_39_cast_fp16, var_1387_cast_fp16))[name = string("input_57_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_1382_to_fp16 = const()[name = string("op_1382_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1382_to_fp16, x = input_57_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_1392_split_sizes_0 = const()[name = string("op_1392_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1392_axis_0 = const()[name = string("op_1392_axis_0"), val = int32(-1)]; + tensor var_1392_cast_fp16_0, tensor var_1392_cast_fp16_1 = split(axis = var_1392_axis_0, split_sizes = var_1392_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1392_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397166912)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_1392_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_41_cast_fp16 = add(x = x_31_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_41_cast_fp16")]; + int32 var_1401 = const()[name = string("op_1401"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1403_cast_fp16 = mul(x = x_41_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1403_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_1401, interleave = input_59_interleave_0, values = (x_41_cast_fp16, var_1403_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_1398_to_fp16 = const()[name = string("op_1398_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1398_to_fp16, x = input_59_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor var_1408_split_sizes_0 = const()[name = string("op_1408_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1408_axis_0 = const()[name = string("op_1408_axis_0"), val = int32(-1)]; + tensor var_1408_cast_fp16_0, tensor var_1408_cast_fp16_1 = split(axis = var_1408_axis_0, split_sizes = var_1408_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1408_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397172096)))]; + tensor h_15_cast_fp16 = mul(x = var_1408_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_1419 = const()[name = string("op_1419"), val = tensor([0, 2, 1])]; + tensor input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor([2])]; + tensor var_1420 = transpose(perm = var_1419, x = h_15_cast_fp16)[name = string("transpose_66")]; + tensor input_61 = expand_dims(axes = input_61_axes_0, x = var_1420)[name = string("input_61")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_61)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_61)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_63 = mul(x = gate_11, y = up_5)[name = string("input_63")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_63)[name = string("mlp_out_5")]; + tensor var_1460_axes_0 = const()[name = string("op_1460_axes_0"), val = tensor([2])]; + tensor var_1460 = squeeze(axes = var_1460_axes_0, x = mlp_out_5)[name = string("op_1460")]; + tensor var_1464 = const()[name = string("op_1464"), val = tensor([0, 2, 1])]; + int32 var_1470 = const()[name = string("op_1470"), val = int32(-1)]; + fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; + tensor x_43 = transpose(perm = var_1464, x = var_1460)[name = string("transpose_65")]; + tensor var_1472 = mul(x = x_43, y = const_21_promoted)[name = string("op_1472")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_1470, interleave = input_65_interleave_0, values = (x_43, var_1472))[name = string("input_65")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_1467_to_fp16 = const()[name = string("op_1467_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1467_to_fp16, x = input_65)[name = string("normed_65_cast_fp16")]; + tensor var_1477_split_sizes_0 = const()[name = string("op_1477_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1477_axis_0 = const()[name = string("op_1477_axis_0"), val = int32(-1)]; + tensor var_1477_0, tensor var_1477_1 = split(axis = var_1477_axis_0, split_sizes = var_1477_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1477")]; + tensor hidden_states_23 = mul(x = var_1477_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_41_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 8960])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 1, 9216])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_1505 = const()[name = string("op_1505"), val = tensor([0, 2, 1])]; + tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; + tensor var_1506 = transpose(perm = var_1505, x = hidden_states_25_cast_fp16)[name = string("transpose_64")]; + tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_1506)[name = string("input_67")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_67)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_1525 = const()[name = string("op_1525"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_1526_cast_fp16 = transpose(perm = var_1525, x = per_layer_slice_5_cast_fp16)[name = string("transpose_63")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_1526_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_69_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_69_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397177280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397505024))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_69_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_1542_axes_0 = const()[name = string("op_1542_axes_0"), val = tensor([2])]; + tensor var_1542_cast_fp16 = squeeze(axes = var_1542_axes_0, x = gated_17_cast_fp16)[name = string("op_1542_cast_fp16")]; + tensor var_1546 = const()[name = string("op_1546"), val = tensor([0, 2, 1])]; + int32 var_1552 = const()[name = string("op_1552"), val = int32(-1)]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_45_cast_fp16 = transpose(perm = var_1546, x = var_1542_cast_fp16)[name = string("transpose_62")]; + tensor var_1554_cast_fp16 = mul(x = x_45_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1554_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_1552, interleave = input_71_interleave_0, values = (x_45_cast_fp16, var_1554_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_1549_to_fp16 = const()[name = string("op_1549_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_1549_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor var_1559_split_sizes_0 = const()[name = string("op_1559_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1559_axis_0 = const()[name = string("op_1559_axis_0"), val = int32(-1)]; + tensor var_1559_cast_fp16_0, tensor var_1559_cast_fp16_1 = split(axis = var_1559_axis_0, split_sizes = var_1559_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_1559_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397507648)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_1559_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor([0x1.acp-1])]; + tensor x_47_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_47_cast_fp16")]; + int32 var_1574 = const()[name = string("op_1574"), val = int32(-1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1576_cast_fp16 = mul(x = x_47_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1576_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_1574, interleave = input_73_interleave_0, values = (x_47_cast_fp16, var_1576_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_1571_to_fp16 = const()[name = string("op_1571_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_1571_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_1581_split_sizes_0 = const()[name = string("op_1581_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1581_axis_0 = const()[name = string("op_1581_axis_0"), val = int32(-1)]; + tensor var_1581_cast_fp16_0, tensor var_1581_cast_fp16_1 = split(axis = var_1581_axis_0, split_sizes = var_1581_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_1581_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397512832)))]; + tensor h_19_cast_fp16 = mul(x = var_1581_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_1587 = const()[name = string("op_1587"), val = tensor([0, 2, 1])]; + tensor var_1590_axes_0 = const()[name = string("op_1590_axes_0"), val = tensor([2])]; + tensor var_1588_cast_fp16 = transpose(perm = var_1587, x = h_19_cast_fp16)[name = string("transpose_61")]; + tensor var_1590_cast_fp16 = expand_dims(axes = var_1590_axes_0, x = var_1588_cast_fp16)[name = string("op_1590_cast_fp16")]; + string var_1606_pad_type_0 = const()[name = string("op_1606_pad_type_0"), val = string("valid")]; + tensor var_1606_strides_0 = const()[name = string("op_1606_strides_0"), val = tensor([1, 1])]; + tensor var_1606_pad_0 = const()[name = string("op_1606_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1606_dilations_0 = const()[name = string("op_1606_dilations_0"), val = tensor([1, 1])]; + int32 var_1606_groups_0 = const()[name = string("op_1606_groups_0"), val = int32(1)]; + tensor var_1606 = conv(dilations = var_1606_dilations_0, groups = var_1606_groups_0, pad = var_1606_pad_0, pad_type = var_1606_pad_type_0, strides = var_1606_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_1590_cast_fp16)[name = string("op_1606")]; + tensor var_1611 = const()[name = string("op_1611"), val = tensor([1, 8, 256, 1])]; + tensor var_1612 = reshape(shape = var_1611, x = var_1606)[name = string("op_1612")]; + tensor var_1617 = const()[name = string("op_1617"), val = tensor([0, 1, 3, 2])]; + tensor var_1627 = const()[name = string("op_1627"), val = tensor([1, 8, 256])]; + tensor var_1618 = transpose(perm = var_1617, x = var_1612)[name = string("transpose_60")]; + tensor x_49 = reshape(shape = var_1627, x = var_1618)[name = string("x_49")]; + int32 var_1633 = const()[name = string("op_1633"), val = int32(-1)]; + fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; + tensor var_1635 = mul(x = x_49, y = const_25_promoted)[name = string("op_1635")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77 = concat(axis = var_1633, interleave = input_77_interleave_0, values = (x_49, var_1635))[name = string("input_77")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_1630_to_fp16 = const()[name = string("op_1630_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_1630_to_fp16, x = input_77)[name = string("normed_77_cast_fp16")]; + tensor var_1640_split_sizes_0 = const()[name = string("op_1640_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1640_axis_0 = const()[name = string("op_1640_axis_0"), val = int32(-1)]; + tensor var_1640_0, tensor var_1640_1 = split(axis = var_1640_axis_0, split_sizes = var_1640_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_1640")]; + tensor var_1642 = mul(x = var_1640_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_1642")]; + tensor var_1647 = const()[name = string("op_1647"), val = tensor([1, 8, 1, 256])]; + tensor q_21 = reshape(shape = var_1647, x = var_1642)[name = string("q_21")]; + tensor var_1649_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_1649_cast_fp16")]; + tensor var_1650_split_sizes_0 = const()[name = string("op_1650_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1650_axis_0 = const()[name = string("op_1650_axis_0"), val = int32(-1)]; + tensor var_1650_0, tensor var_1650_1 = split(axis = var_1650_axis_0, split_sizes = var_1650_split_sizes_0, x = q_21)[name = string("op_1650")]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_1652 = mul(x = var_1650_1, y = const_26_promoted)[name = string("op_1652")]; + int32 var_1654 = const()[name = string("op_1654"), val = int32(-1)]; + bool var_1655_interleave_0 = const()[name = string("op_1655_interleave_0"), val = bool(false)]; + tensor var_1655 = concat(axis = var_1654, interleave = var_1655_interleave_0, values = (var_1652, var_1650_0))[name = string("op_1655")]; + tensor var_1656_cast_fp16 = mul(x = var_1655, y = sin_s)[name = string("op_1656_cast_fp16")]; + tensor q_23_cast_fp16 = add(x = var_1649_cast_fp16, y = var_1656_cast_fp16)[name = string("q_23_cast_fp16")]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_23_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_51_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_51_cast_fp16)[name = string("reduce_max_3")]; + tensor var_1688 = sub(x = x_51_cast_fp16, y = reduce_max_3)[name = string("op_1688")]; + tensor var_1694 = exp(x = var_1688)[name = string("op_1694")]; + tensor var_1704_axes_0 = const()[name = string("op_1704_axes_0"), val = tensor([-1])]; + bool var_1704_keep_dims_0 = const()[name = string("op_1704_keep_dims_0"), val = bool(true)]; + tensor var_1704 = reduce_sum(axes = var_1704_axes_0, keep_dims = var_1704_keep_dims_0, x = var_1694)[name = string("op_1704")]; + tensor var_1710_cast_fp16 = real_div(x = var_1694, y = var_1704)[name = string("op_1710_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_1710_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_1721 = const()[name = string("op_1721"), val = tensor([0, 2, 1, 3])]; + tensor var_1728 = const()[name = string("op_1728"), val = tensor([1, 1, -1])]; + tensor var_1722_cast_fp16 = transpose(perm = var_1721, x = attn_output_19_cast_fp16)[name = string("transpose_59")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_1728, x = var_1722_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1733 = const()[name = string("op_1733"), val = tensor([0, 2, 1])]; + string var_1749_pad_type_0 = const()[name = string("op_1749_pad_type_0"), val = string("valid")]; + int32 var_1749_groups_0 = const()[name = string("op_1749_groups_0"), val = int32(1)]; + tensor var_1749_strides_0 = const()[name = string("op_1749_strides_0"), val = tensor([1])]; + tensor var_1749_pad_0 = const()[name = string("op_1749_pad_0"), val = tensor([0, 0])]; + tensor var_1749_dilations_0 = const()[name = string("op_1749_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397518016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400139520))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1734_cast_fp16 = transpose(perm = var_1733, x = attn_output_21_cast_fp16)[name = string("transpose_58")]; + tensor var_1749_cast_fp16 = conv(dilations = var_1749_dilations_0, groups = var_1749_groups_0, pad = var_1749_pad_0, pad_type = var_1749_pad_type_0, strides = var_1749_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_1734_cast_fp16)[name = string("op_1749_cast_fp16")]; + tensor var_1753 = const()[name = string("op_1753"), val = tensor([0, 2, 1])]; + int32 var_1759 = const()[name = string("op_1759"), val = int32(-1)]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_55_cast_fp16 = transpose(perm = var_1753, x = var_1749_cast_fp16)[name = string("transpose_57")]; + tensor var_1761_cast_fp16 = mul(x = x_55_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1761_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_1759, interleave = input_81_interleave_0, values = (x_55_cast_fp16, var_1761_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_1756_to_fp16 = const()[name = string("op_1756_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_1756_to_fp16, x = input_81_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor var_1766_split_sizes_0 = const()[name = string("op_1766_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1766_axis_0 = const()[name = string("op_1766_axis_0"), val = int32(-1)]; + tensor var_1766_cast_fp16_0, tensor var_1766_cast_fp16_1 = split(axis = var_1766_axis_0, split_sizes = var_1766_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_1766_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400142144)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_1766_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_47_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_57_cast_fp16")]; + int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1777_cast_fp16 = mul(x = x_57_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1777_cast_fp16")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83_cast_fp16 = concat(axis = var_1775, interleave = input_83_interleave_0, values = (x_57_cast_fp16, var_1777_cast_fp16))[name = string("input_83_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_1772_to_fp16 = const()[name = string("op_1772_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_1772_to_fp16, x = input_83_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_1782_split_sizes_0 = const()[name = string("op_1782_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1782_axis_0 = const()[name = string("op_1782_axis_0"), val = int32(-1)]; + tensor var_1782_cast_fp16_0, tensor var_1782_cast_fp16_1 = split(axis = var_1782_axis_0, split_sizes = var_1782_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_1782_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400147328)))]; + tensor h_21_cast_fp16 = mul(x = var_1782_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_1793 = const()[name = string("op_1793"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_1794 = transpose(perm = var_1793, x = h_21_cast_fp16)[name = string("transpose_56")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_1794)[name = string("input_85")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_85)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_85)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_87 = mul(x = gate_15, y = up_7)[name = string("input_87")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_87)[name = string("mlp_out_7")]; + tensor var_1834_axes_0 = const()[name = string("op_1834_axes_0"), val = tensor([2])]; + tensor var_1834 = squeeze(axes = var_1834_axes_0, x = mlp_out_7)[name = string("op_1834")]; + tensor var_1838 = const()[name = string("op_1838"), val = tensor([0, 2, 1])]; + int32 var_1844 = const()[name = string("op_1844"), val = int32(-1)]; + fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; + tensor x_59 = transpose(perm = var_1838, x = var_1834)[name = string("transpose_55")]; + tensor var_1846 = mul(x = x_59, y = const_29_promoted)[name = string("op_1846")]; + bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; + tensor input_89 = concat(axis = var_1844, interleave = input_89_interleave_0, values = (x_59, var_1846))[name = string("input_89")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_1841_to_fp16 = const()[name = string("op_1841_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_1841_to_fp16, x = input_89)[name = string("normed_89_cast_fp16")]; + tensor var_1851_split_sizes_0 = const()[name = string("op_1851_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1851_axis_0 = const()[name = string("op_1851_axis_0"), val = int32(-1)]; + tensor var_1851_0, tensor var_1851_1 = split(axis = var_1851_axis_0, split_sizes = var_1851_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_1851")]; + tensor hidden_states_33 = mul(x = var_1851_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_57_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 9216])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 1, 9472])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_1879 = const()[name = string("op_1879"), val = tensor([0, 2, 1])]; + tensor input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor([2])]; + tensor var_1880 = transpose(perm = var_1879, x = hidden_states_35_cast_fp16)[name = string("transpose_54")]; + tensor input_91 = expand_dims(axes = input_91_axes_0, x = var_1880)[name = string("input_91")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_91)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_1899 = const()[name = string("op_1899"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_1900_cast_fp16 = transpose(perm = var_1899, x = per_layer_slice_7_cast_fp16)[name = string("transpose_53")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_1900_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_93_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_93_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400152512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400480256))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_93_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_1916_axes_0 = const()[name = string("op_1916_axes_0"), val = tensor([2])]; + tensor var_1916_cast_fp16 = squeeze(axes = var_1916_axes_0, x = gated_23_cast_fp16)[name = string("op_1916_cast_fp16")]; + tensor var_1920 = const()[name = string("op_1920"), val = tensor([0, 2, 1])]; + int32 var_1926 = const()[name = string("op_1926"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_61_cast_fp16 = transpose(perm = var_1920, x = var_1916_cast_fp16)[name = string("transpose_52")]; + tensor var_1928_cast_fp16 = mul(x = x_61_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_1928_cast_fp16")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95_cast_fp16 = concat(axis = var_1926, interleave = input_95_interleave_0, values = (x_61_cast_fp16, var_1928_cast_fp16))[name = string("input_95_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_1923_to_fp16 = const()[name = string("op_1923_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_1923_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor var_1933_split_sizes_0 = const()[name = string("op_1933_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1933_axis_0 = const()[name = string("op_1933_axis_0"), val = int32(-1)]; + tensor var_1933_cast_fp16_0, tensor var_1933_cast_fp16_1 = split(axis = var_1933_axis_0, split_sizes = var_1933_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_1933_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400482880)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_1933_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = tensor([0x1.b6p-1])]; + tensor x_63_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_31_promoted_to_fp16)[name = string("x_63_cast_fp16")]; + int32 var_1948 = const()[name = string("op_1948"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1950_cast_fp16 = mul(x = x_63_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_1950_cast_fp16")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97_cast_fp16 = concat(axis = var_1948, interleave = input_97_interleave_0, values = (x_63_cast_fp16, var_1950_cast_fp16))[name = string("input_97_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_1945_to_fp16 = const()[name = string("op_1945_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_1945_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor var_1955_split_sizes_0 = const()[name = string("op_1955_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1955_axis_0 = const()[name = string("op_1955_axis_0"), val = int32(-1)]; + tensor var_1955_cast_fp16_0, tensor var_1955_cast_fp16_1 = split(axis = var_1955_axis_0, split_sizes = var_1955_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_1955_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400488064)))]; + tensor h_25_cast_fp16 = mul(x = var_1955_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_1961 = const()[name = string("op_1961"), val = tensor([0, 2, 1])]; + tensor var_1964_axes_0 = const()[name = string("op_1964_axes_0"), val = tensor([2])]; + tensor var_1962_cast_fp16 = transpose(perm = var_1961, x = h_25_cast_fp16)[name = string("transpose_51")]; + tensor var_1964_cast_fp16 = expand_dims(axes = var_1964_axes_0, x = var_1962_cast_fp16)[name = string("op_1964_cast_fp16")]; + string var_1980_pad_type_0 = const()[name = string("op_1980_pad_type_0"), val = string("valid")]; + tensor var_1980_strides_0 = const()[name = string("op_1980_strides_0"), val = tensor([1, 1])]; + tensor var_1980_pad_0 = const()[name = string("op_1980_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1980_dilations_0 = const()[name = string("op_1980_dilations_0"), val = tensor([1, 1])]; + int32 var_1980_groups_0 = const()[name = string("op_1980_groups_0"), val = int32(1)]; + tensor var_1980 = conv(dilations = var_1980_dilations_0, groups = var_1980_groups_0, pad = var_1980_pad_0, pad_type = var_1980_pad_type_0, strides = var_1980_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_1964_cast_fp16)[name = string("op_1980")]; + tensor var_1985 = const()[name = string("op_1985"), val = tensor([1, 8, 256, 1])]; + tensor var_1986 = reshape(shape = var_1985, x = var_1980)[name = string("op_1986")]; + tensor var_1991 = const()[name = string("op_1991"), val = tensor([0, 1, 3, 2])]; + tensor var_2001 = const()[name = string("op_2001"), val = tensor([1, 8, 256])]; + tensor var_1992 = transpose(perm = var_1991, x = var_1986)[name = string("transpose_50")]; + tensor x_65 = reshape(shape = var_2001, x = var_1992)[name = string("x_65")]; + int32 var_2007 = const()[name = string("op_2007"), val = int32(-1)]; + fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; + tensor var_2009 = mul(x = x_65, y = const_33_promoted)[name = string("op_2009")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101 = concat(axis = var_2007, interleave = input_101_interleave_0, values = (x_65, var_2009))[name = string("input_101")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_2004_to_fp16 = const()[name = string("op_2004_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2004_to_fp16, x = input_101)[name = string("normed_101_cast_fp16")]; + tensor var_2014_split_sizes_0 = const()[name = string("op_2014_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2014_axis_0 = const()[name = string("op_2014_axis_0"), val = int32(-1)]; + tensor var_2014_0, tensor var_2014_1 = split(axis = var_2014_axis_0, split_sizes = var_2014_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2014")]; + tensor var_2016 = mul(x = var_2014_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2016")]; + tensor var_2021 = const()[name = string("op_2021"), val = tensor([1, 8, 1, 256])]; + tensor q_27 = reshape(shape = var_2021, x = var_2016)[name = string("q_27")]; + tensor var_2023_cast_fp16 = mul(x = q_27, y = cos_s)[name = string("op_2023_cast_fp16")]; + tensor var_2024_split_sizes_0 = const()[name = string("op_2024_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2024_axis_0 = const()[name = string("op_2024_axis_0"), val = int32(-1)]; + tensor var_2024_0, tensor var_2024_1 = split(axis = var_2024_axis_0, split_sizes = var_2024_split_sizes_0, x = q_27)[name = string("op_2024")]; + fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; + tensor var_2026 = mul(x = var_2024_1, y = const_34_promoted)[name = string("op_2026")]; + int32 var_2028 = const()[name = string("op_2028"), val = int32(-1)]; + bool var_2029_interleave_0 = const()[name = string("op_2029_interleave_0"), val = bool(false)]; + tensor var_2029 = concat(axis = var_2028, interleave = var_2029_interleave_0, values = (var_2026, var_2024_0))[name = string("op_2029")]; + tensor var_2030_cast_fp16 = mul(x = var_2029, y = sin_s)[name = string("op_2030_cast_fp16")]; + tensor q_29_cast_fp16 = add(x = var_2023_cast_fp16, y = var_2030_cast_fp16)[name = string("q_29_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_29_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_4")]; + tensor var_2062 = sub(x = x_67_cast_fp16, y = reduce_max_4)[name = string("op_2062")]; + tensor var_2068 = exp(x = var_2062)[name = string("op_2068")]; + tensor var_2078_axes_0 = const()[name = string("op_2078_axes_0"), val = tensor([-1])]; + bool var_2078_keep_dims_0 = const()[name = string("op_2078_keep_dims_0"), val = bool(true)]; + tensor var_2078 = reduce_sum(axes = var_2078_axes_0, keep_dims = var_2078_keep_dims_0, x = var_2068)[name = string("op_2078")]; + tensor var_2084_cast_fp16 = real_div(x = var_2068, y = var_2078)[name = string("op_2084_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_2084_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_2095 = const()[name = string("op_2095"), val = tensor([0, 2, 1, 3])]; + tensor var_2102 = const()[name = string("op_2102"), val = tensor([1, 1, -1])]; + tensor var_2096_cast_fp16 = transpose(perm = var_2095, x = attn_output_25_cast_fp16)[name = string("transpose_49")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_2102, x = var_2096_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_2107 = const()[name = string("op_2107"), val = tensor([0, 2, 1])]; + string var_2123_pad_type_0 = const()[name = string("op_2123_pad_type_0"), val = string("valid")]; + int32 var_2123_groups_0 = const()[name = string("op_2123_groups_0"), val = int32(1)]; + tensor var_2123_strides_0 = const()[name = string("op_2123_strides_0"), val = tensor([1])]; + tensor var_2123_pad_0 = const()[name = string("op_2123_pad_0"), val = tensor([0, 0])]; + tensor var_2123_dilations_0 = const()[name = string("op_2123_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400493248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403114752))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2108_cast_fp16 = transpose(perm = var_2107, x = attn_output_27_cast_fp16)[name = string("transpose_48")]; + tensor var_2123_cast_fp16 = conv(dilations = var_2123_dilations_0, groups = var_2123_groups_0, pad = var_2123_pad_0, pad_type = var_2123_pad_type_0, strides = var_2123_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_2108_cast_fp16)[name = string("op_2123_cast_fp16")]; + tensor var_2127 = const()[name = string("op_2127"), val = tensor([0, 2, 1])]; + int32 var_2133 = const()[name = string("op_2133"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_2127, x = var_2123_cast_fp16)[name = string("transpose_47")]; + tensor var_2135_cast_fp16 = mul(x = x_71_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2135_cast_fp16")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105_cast_fp16 = concat(axis = var_2133, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2135_cast_fp16))[name = string("input_105_cast_fp16")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_2130_to_fp16 = const()[name = string("op_2130_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2130_to_fp16, x = input_105_cast_fp16)[name = string("normed_105_cast_fp16")]; + tensor var_2140_split_sizes_0 = const()[name = string("op_2140_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2140_axis_0 = const()[name = string("op_2140_axis_0"), val = int32(-1)]; + tensor var_2140_cast_fp16_0, tensor var_2140_cast_fp16_1 = split(axis = var_2140_axis_0, split_sizes = var_2140_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2140_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403117376)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_2140_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_63_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_2149 = const()[name = string("op_2149"), val = int32(-1)]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2151_cast_fp16 = mul(x = x_73_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2151_cast_fp16")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107_cast_fp16 = concat(axis = var_2149, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2151_cast_fp16))[name = string("input_107_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_2146_to_fp16 = const()[name = string("op_2146_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2146_to_fp16, x = input_107_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor var_2156_split_sizes_0 = const()[name = string("op_2156_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2156_axis_0 = const()[name = string("op_2156_axis_0"), val = int32(-1)]; + tensor var_2156_cast_fp16_0, tensor var_2156_cast_fp16_1 = split(axis = var_2156_axis_0, split_sizes = var_2156_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2156_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403122560)))]; + tensor h_27_cast_fp16 = mul(x = var_2156_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_2167 = const()[name = string("op_2167"), val = tensor([0, 2, 1])]; + tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([2])]; + tensor var_2168 = transpose(perm = var_2167, x = h_27_cast_fp16)[name = string("transpose_46")]; + tensor input_109 = expand_dims(axes = input_109_axes_0, x = var_2168)[name = string("input_109")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_111 = mul(x = gate_19, y = up_9)[name = string("input_111")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_9")]; + tensor var_2208_axes_0 = const()[name = string("op_2208_axes_0"), val = tensor([2])]; + tensor var_2208 = squeeze(axes = var_2208_axes_0, x = mlp_out_9)[name = string("op_2208")]; + tensor var_2212 = const()[name = string("op_2212"), val = tensor([0, 2, 1])]; + int32 var_2218 = const()[name = string("op_2218"), val = int32(-1)]; + fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_2212, x = var_2208)[name = string("transpose_45")]; + tensor var_2220 = mul(x = x_75, y = const_37_promoted)[name = string("op_2220")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_2218, interleave = input_113_interleave_0, values = (x_75, var_2220))[name = string("input_113")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_2215_to_fp16 = const()[name = string("op_2215_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2215_to_fp16, x = input_113)[name = string("normed_113_cast_fp16")]; + tensor var_2225_split_sizes_0 = const()[name = string("op_2225_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2225_axis_0 = const()[name = string("op_2225_axis_0"), val = int32(-1)]; + tensor var_2225_0, tensor var_2225_1 = split(axis = var_2225_axis_0, split_sizes = var_2225_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2225")]; + tensor hidden_states_43 = mul(x = var_2225_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 9472])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 1, 9728])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_2253 = const()[name = string("op_2253"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_2254 = transpose(perm = var_2253, x = hidden_states_45_cast_fp16)[name = string("transpose_44")]; + tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_2254)[name = string("input_115")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_2273 = const()[name = string("op_2273"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_2274_cast_fp16 = transpose(perm = var_2273, x = per_layer_slice_9_cast_fp16)[name = string("transpose_43")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_2274_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_117_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403127744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403455488))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_2290_axes_0 = const()[name = string("op_2290_axes_0"), val = tensor([2])]; + tensor var_2290_cast_fp16 = squeeze(axes = var_2290_axes_0, x = gated_29_cast_fp16)[name = string("op_2290_cast_fp16")]; + tensor var_2294 = const()[name = string("op_2294"), val = tensor([0, 2, 1])]; + int32 var_2300 = const()[name = string("op_2300"), val = int32(-1)]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_2294, x = var_2290_cast_fp16)[name = string("transpose_42")]; + tensor var_2302_cast_fp16 = mul(x = x_77_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2302_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_2300, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2302_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_2297_to_fp16 = const()[name = string("op_2297_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2297_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor var_2307_split_sizes_0 = const()[name = string("op_2307_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2307_axis_0 = const()[name = string("op_2307_axis_0"), val = int32(-1)]; + tensor var_2307_cast_fp16_0, tensor var_2307_cast_fp16_1 = split(axis = var_2307_axis_0, split_sizes = var_2307_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2307_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403458112)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_2307_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = tensor([0x1.c6p-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_39_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + int32 var_2322 = const()[name = string("op_2322"), val = int32(-1)]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2324_cast_fp16 = mul(x = x_79_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2324_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_2322, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2324_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2319_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor var_2329_split_sizes_0 = const()[name = string("op_2329_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2329_axis_0 = const()[name = string("op_2329_axis_0"), val = int32(-1)]; + tensor var_2329_cast_fp16_0, tensor var_2329_cast_fp16_1 = split(axis = var_2329_axis_0, split_sizes = var_2329_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2329_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403463296)))]; + tensor h_31_cast_fp16 = mul(x = var_2329_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_2335 = const()[name = string("op_2335"), val = tensor([0, 2, 1])]; + tensor var_2338_axes_0 = const()[name = string("op_2338_axes_0"), val = tensor([2])]; + tensor var_2336_cast_fp16 = transpose(perm = var_2335, x = h_31_cast_fp16)[name = string("transpose_41")]; + tensor var_2338_cast_fp16 = expand_dims(axes = var_2338_axes_0, x = var_2336_cast_fp16)[name = string("op_2338_cast_fp16")]; + string var_2354_pad_type_0 = const()[name = string("op_2354_pad_type_0"), val = string("valid")]; + tensor var_2354_strides_0 = const()[name = string("op_2354_strides_0"), val = tensor([1, 1])]; + tensor var_2354_pad_0 = const()[name = string("op_2354_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2354_dilations_0 = const()[name = string("op_2354_dilations_0"), val = tensor([1, 1])]; + int32 var_2354_groups_0 = const()[name = string("op_2354_groups_0"), val = int32(1)]; + tensor var_2354 = conv(dilations = var_2354_dilations_0, groups = var_2354_groups_0, pad = var_2354_pad_0, pad_type = var_2354_pad_type_0, strides = var_2354_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_2338_cast_fp16)[name = string("op_2354")]; + tensor var_2359 = const()[name = string("op_2359"), val = tensor([1, 8, 256, 1])]; + tensor var_2360 = reshape(shape = var_2359, x = var_2354)[name = string("op_2360")]; + tensor var_2365 = const()[name = string("op_2365"), val = tensor([0, 1, 3, 2])]; + tensor var_2375 = const()[name = string("op_2375"), val = tensor([1, 8, 256])]; + tensor var_2366 = transpose(perm = var_2365, x = var_2360)[name = string("transpose_40")]; + tensor x_81 = reshape(shape = var_2375, x = var_2366)[name = string("x_81")]; + int32 var_2381 = const()[name = string("op_2381"), val = int32(-1)]; + fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; + tensor var_2383 = mul(x = x_81, y = const_41_promoted)[name = string("op_2383")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_2381, interleave = input_125_interleave_0, values = (x_81, var_2383))[name = string("input_125")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_2378_to_fp16 = const()[name = string("op_2378_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2378_to_fp16, x = input_125)[name = string("normed_125_cast_fp16")]; + tensor var_2388_split_sizes_0 = const()[name = string("op_2388_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2388_axis_0 = const()[name = string("op_2388_axis_0"), val = int32(-1)]; + tensor var_2388_0, tensor var_2388_1 = split(axis = var_2388_axis_0, split_sizes = var_2388_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2388")]; + tensor var_2390 = mul(x = var_2388_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2390")]; + tensor var_2395 = const()[name = string("op_2395"), val = tensor([1, 8, 1, 256])]; + tensor q_33 = reshape(shape = var_2395, x = var_2390)[name = string("q_33")]; + tensor var_2397_cast_fp16 = mul(x = q_33, y = cos_s)[name = string("op_2397_cast_fp16")]; + tensor var_2398_split_sizes_0 = const()[name = string("op_2398_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2398_axis_0 = const()[name = string("op_2398_axis_0"), val = int32(-1)]; + tensor var_2398_0, tensor var_2398_1 = split(axis = var_2398_axis_0, split_sizes = var_2398_split_sizes_0, x = q_33)[name = string("op_2398")]; + fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; + tensor var_2400 = mul(x = var_2398_1, y = const_42_promoted)[name = string("op_2400")]; + int32 var_2402 = const()[name = string("op_2402"), val = int32(-1)]; + bool var_2403_interleave_0 = const()[name = string("op_2403_interleave_0"), val = bool(false)]; + tensor var_2403 = concat(axis = var_2402, interleave = var_2403_interleave_0, values = (var_2400, var_2398_0))[name = string("op_2403")]; + tensor var_2404_cast_fp16 = mul(x = var_2403, y = sin_s)[name = string("op_2404_cast_fp16")]; + tensor q_35_cast_fp16 = add(x = var_2397_cast_fp16, y = var_2404_cast_fp16)[name = string("q_35_cast_fp16")]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_35_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_83_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_sliding)[name = string("x_83_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_83_cast_fp16)[name = string("reduce_max_5")]; + tensor var_2436 = sub(x = x_83_cast_fp16, y = reduce_max_5)[name = string("op_2436")]; + tensor var_2442 = exp(x = var_2436)[name = string("op_2442")]; + tensor var_2452_axes_0 = const()[name = string("op_2452_axes_0"), val = tensor([-1])]; + bool var_2452_keep_dims_0 = const()[name = string("op_2452_keep_dims_0"), val = bool(true)]; + tensor var_2452 = reduce_sum(axes = var_2452_axes_0, keep_dims = var_2452_keep_dims_0, x = var_2442)[name = string("op_2452")]; + tensor var_2458_cast_fp16 = real_div(x = var_2442, y = var_2452)[name = string("op_2458_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_2458_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_2469 = const()[name = string("op_2469"), val = tensor([0, 2, 1, 3])]; + tensor var_2476 = const()[name = string("op_2476"), val = tensor([1, 1, -1])]; + tensor var_2470_cast_fp16 = transpose(perm = var_2469, x = attn_output_31_cast_fp16)[name = string("transpose_39")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_2476, x = var_2470_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_2481 = const()[name = string("op_2481"), val = tensor([0, 2, 1])]; + string var_2497_pad_type_0 = const()[name = string("op_2497_pad_type_0"), val = string("valid")]; + int32 var_2497_groups_0 = const()[name = string("op_2497_groups_0"), val = int32(1)]; + tensor var_2497_strides_0 = const()[name = string("op_2497_strides_0"), val = tensor([1])]; + tensor var_2497_pad_0 = const()[name = string("op_2497_pad_0"), val = tensor([0, 0])]; + tensor var_2497_dilations_0 = const()[name = string("op_2497_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403468480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406089984))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2482_cast_fp16 = transpose(perm = var_2481, x = attn_output_33_cast_fp16)[name = string("transpose_38")]; + tensor var_2497_cast_fp16 = conv(dilations = var_2497_dilations_0, groups = var_2497_groups_0, pad = var_2497_pad_0, pad_type = var_2497_pad_type_0, strides = var_2497_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_2482_cast_fp16)[name = string("op_2497_cast_fp16")]; + tensor var_2501 = const()[name = string("op_2501"), val = tensor([0, 2, 1])]; + int32 var_2507 = const()[name = string("op_2507"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_87_cast_fp16 = transpose(perm = var_2501, x = var_2497_cast_fp16)[name = string("transpose_37")]; + tensor var_2509_cast_fp16 = mul(x = x_87_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2509_cast_fp16")]; + bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)]; + tensor input_129_cast_fp16 = concat(axis = var_2507, interleave = input_129_interleave_0, values = (x_87_cast_fp16, var_2509_cast_fp16))[name = string("input_129_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_2504_to_fp16 = const()[name = string("op_2504_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_2504_to_fp16, x = input_129_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_2514_split_sizes_0 = const()[name = string("op_2514_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2514_axis_0 = const()[name = string("op_2514_axis_0"), val = int32(-1)]; + tensor var_2514_cast_fp16_0, tensor var_2514_cast_fp16_1 = split(axis = var_2514_axis_0, split_sizes = var_2514_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_2514_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406092608)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_2514_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_2523 = const()[name = string("op_2523"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2525_cast_fp16 = mul(x = x_89_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2525_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_2523, interleave = input_131_interleave_0, values = (x_89_cast_fp16, var_2525_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_2520_to_fp16 = const()[name = string("op_2520_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_2520_to_fp16, x = input_131_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor var_2530_split_sizes_0 = const()[name = string("op_2530_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2530_axis_0 = const()[name = string("op_2530_axis_0"), val = int32(-1)]; + tensor var_2530_cast_fp16_0, tensor var_2530_cast_fp16_1 = split(axis = var_2530_axis_0, split_sizes = var_2530_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_2530_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406097792)))]; + tensor h_33_cast_fp16 = mul(x = var_2530_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_2541 = const()[name = string("op_2541"), val = tensor([0, 2, 1])]; + tensor input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor([2])]; + tensor var_2542 = transpose(perm = var_2541, x = h_33_cast_fp16)[name = string("transpose_36")]; + tensor input_133 = expand_dims(axes = input_133_axes_0, x = var_2542)[name = string("input_133")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_133)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_133)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_135 = mul(x = gate_23, y = up_11)[name = string("input_135")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_135)[name = string("mlp_out_11")]; + tensor var_2582_axes_0 = const()[name = string("op_2582_axes_0"), val = tensor([2])]; + tensor var_2582 = squeeze(axes = var_2582_axes_0, x = mlp_out_11)[name = string("op_2582")]; + tensor var_2586 = const()[name = string("op_2586"), val = tensor([0, 2, 1])]; + int32 var_2592 = const()[name = string("op_2592"), val = int32(-1)]; + fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; + tensor x_91 = transpose(perm = var_2586, x = var_2582)[name = string("transpose_35")]; + tensor var_2594 = mul(x = x_91, y = const_45_promoted)[name = string("op_2594")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137 = concat(axis = var_2592, interleave = input_137_interleave_0, values = (x_91, var_2594))[name = string("input_137")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_2589_to_fp16 = const()[name = string("op_2589_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_2589_to_fp16, x = input_137)[name = string("normed_137_cast_fp16")]; + tensor var_2599_split_sizes_0 = const()[name = string("op_2599_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2599_axis_0 = const()[name = string("op_2599_axis_0"), val = int32(-1)]; + tensor var_2599_0, tensor var_2599_1 = split(axis = var_2599_axis_0, split_sizes = var_2599_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_2599")]; + tensor hidden_states_53 = mul(x = var_2599_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 9728])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 1, 9984])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_2627 = const()[name = string("op_2627"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_2628 = transpose(perm = var_2627, x = hidden_states_55_cast_fp16)[name = string("transpose_34")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_2628)[name = string("input_139")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_139)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_2647 = const()[name = string("op_2647"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_2648_cast_fp16 = transpose(perm = var_2647, x = per_layer_slice_11_cast_fp16)[name = string("transpose_33")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_2648_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_141_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_141_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406102976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406430720))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_141_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_2664_axes_0 = const()[name = string("op_2664_axes_0"), val = tensor([2])]; + tensor var_2664_cast_fp16 = squeeze(axes = var_2664_axes_0, x = gated_35_cast_fp16)[name = string("op_2664_cast_fp16")]; + tensor var_2668 = const()[name = string("op_2668"), val = tensor([0, 2, 1])]; + int32 var_2674 = const()[name = string("op_2674"), val = int32(-1)]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_93_cast_fp16 = transpose(perm = var_2668, x = var_2664_cast_fp16)[name = string("transpose_32")]; + tensor var_2676_cast_fp16 = mul(x = x_93_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2676_cast_fp16")]; + bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; + tensor input_143_cast_fp16 = concat(axis = var_2674, interleave = input_143_interleave_0, values = (x_93_cast_fp16, var_2676_cast_fp16))[name = string("input_143_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_2671_to_fp16 = const()[name = string("op_2671_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_2671_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_2681_split_sizes_0 = const()[name = string("op_2681_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2681_axis_0 = const()[name = string("op_2681_axis_0"), val = int32(-1)]; + tensor var_2681_cast_fp16_0, tensor var_2681_cast_fp16_1 = split(axis = var_2681_axis_0, split_sizes = var_2681_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_2681_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406433344)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_2681_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor([0x1.c4p-1])]; + tensor x_95_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_95_cast_fp16")]; + int32 var_2696 = const()[name = string("op_2696"), val = int32(-1)]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2698_cast_fp16 = mul(x = x_95_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2698_cast_fp16")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145_cast_fp16 = concat(axis = var_2696, interleave = input_145_interleave_0, values = (x_95_cast_fp16, var_2698_cast_fp16))[name = string("input_145_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_2693_to_fp16 = const()[name = string("op_2693_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_2693_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor var_2703_split_sizes_0 = const()[name = string("op_2703_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2703_axis_0 = const()[name = string("op_2703_axis_0"), val = int32(-1)]; + tensor var_2703_cast_fp16_0, tensor var_2703_cast_fp16_1 = split(axis = var_2703_axis_0, split_sizes = var_2703_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_2703_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406438528)))]; + tensor h_37_cast_fp16 = mul(x = var_2703_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_2709 = const()[name = string("op_2709"), val = tensor([0, 2, 1])]; + tensor var_2712_axes_0 = const()[name = string("op_2712_axes_0"), val = tensor([2])]; + tensor var_2710_cast_fp16 = transpose(perm = var_2709, x = h_37_cast_fp16)[name = string("transpose_31")]; + tensor var_2712_cast_fp16 = expand_dims(axes = var_2712_axes_0, x = var_2710_cast_fp16)[name = string("op_2712_cast_fp16")]; + string var_2728_pad_type_0 = const()[name = string("op_2728_pad_type_0"), val = string("valid")]; + tensor var_2728_strides_0 = const()[name = string("op_2728_strides_0"), val = tensor([1, 1])]; + tensor var_2728_pad_0 = const()[name = string("op_2728_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2728_dilations_0 = const()[name = string("op_2728_dilations_0"), val = tensor([1, 1])]; + int32 var_2728_groups_0 = const()[name = string("op_2728_groups_0"), val = int32(1)]; + tensor var_2728 = conv(dilations = var_2728_dilations_0, groups = var_2728_groups_0, pad = var_2728_pad_0, pad_type = var_2728_pad_type_0, strides = var_2728_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_2712_cast_fp16)[name = string("op_2728")]; + tensor var_2733 = const()[name = string("op_2733"), val = tensor([1, 8, 256, 1])]; + tensor var_2734 = reshape(shape = var_2733, x = var_2728)[name = string("op_2734")]; + tensor var_2739 = const()[name = string("op_2739"), val = tensor([0, 1, 3, 2])]; + tensor var_2749 = const()[name = string("op_2749"), val = tensor([1, 8, 256])]; + tensor var_2740 = transpose(perm = var_2739, x = var_2734)[name = string("transpose_30")]; + tensor x_97 = reshape(shape = var_2749, x = var_2740)[name = string("x_97")]; + int32 var_2755 = const()[name = string("op_2755"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor var_2757 = mul(x = x_97, y = const_49_promoted)[name = string("op_2757")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149 = concat(axis = var_2755, interleave = input_149_interleave_0, values = (x_97, var_2757))[name = string("input_149")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_2752_to_fp16 = const()[name = string("op_2752_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_2752_to_fp16, x = input_149)[name = string("normed_149_cast_fp16")]; + tensor var_2762_split_sizes_0 = const()[name = string("op_2762_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2762_axis_0 = const()[name = string("op_2762_axis_0"), val = int32(-1)]; + tensor var_2762_0, tensor var_2762_1 = split(axis = var_2762_axis_0, split_sizes = var_2762_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_2762")]; + tensor var_2764 = mul(x = var_2762_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2764")]; + tensor var_2769 = const()[name = string("op_2769"), val = tensor([1, 8, 1, 256])]; + tensor q_39 = reshape(shape = var_2769, x = var_2764)[name = string("q_39")]; + tensor var_2771_cast_fp16 = mul(x = q_39, y = cos_s)[name = string("op_2771_cast_fp16")]; + tensor var_2772_split_sizes_0 = const()[name = string("op_2772_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2772_axis_0 = const()[name = string("op_2772_axis_0"), val = int32(-1)]; + tensor var_2772_0, tensor var_2772_1 = split(axis = var_2772_axis_0, split_sizes = var_2772_split_sizes_0, x = q_39)[name = string("op_2772")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_2774 = mul(x = var_2772_1, y = const_50_promoted)[name = string("op_2774")]; + int32 var_2776 = const()[name = string("op_2776"), val = int32(-1)]; + bool var_2777_interleave_0 = const()[name = string("op_2777_interleave_0"), val = bool(false)]; + tensor var_2777 = concat(axis = var_2776, interleave = var_2777_interleave_0, values = (var_2774, var_2772_0))[name = string("op_2777")]; + tensor var_2778_cast_fp16 = mul(x = var_2777, y = sin_s)[name = string("op_2778_cast_fp16")]; + tensor q_41_cast_fp16 = add(x = var_2771_cast_fp16, y = var_2778_cast_fp16)[name = string("q_41_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_41_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_99_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_99_cast_fp16)[name = string("reduce_max_6")]; + tensor var_2810 = sub(x = x_99_cast_fp16, y = reduce_max_6)[name = string("op_2810")]; + tensor var_2816 = exp(x = var_2810)[name = string("op_2816")]; + tensor var_2826_axes_0 = const()[name = string("op_2826_axes_0"), val = tensor([-1])]; + bool var_2826_keep_dims_0 = const()[name = string("op_2826_keep_dims_0"), val = bool(true)]; + tensor var_2826 = reduce_sum(axes = var_2826_axes_0, keep_dims = var_2826_keep_dims_0, x = var_2816)[name = string("op_2826")]; + tensor var_2832_cast_fp16 = real_div(x = var_2816, y = var_2826)[name = string("op_2832_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_2832_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_2843 = const()[name = string("op_2843"), val = tensor([0, 2, 1, 3])]; + tensor var_2850 = const()[name = string("op_2850"), val = tensor([1, 1, -1])]; + tensor var_2844_cast_fp16 = transpose(perm = var_2843, x = attn_output_37_cast_fp16)[name = string("transpose_29")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_2850, x = var_2844_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_2855 = const()[name = string("op_2855"), val = tensor([0, 2, 1])]; + string var_2871_pad_type_0 = const()[name = string("op_2871_pad_type_0"), val = string("valid")]; + int32 var_2871_groups_0 = const()[name = string("op_2871_groups_0"), val = int32(1)]; + tensor var_2871_strides_0 = const()[name = string("op_2871_strides_0"), val = tensor([1])]; + tensor var_2871_pad_0 = const()[name = string("op_2871_pad_0"), val = tensor([0, 0])]; + tensor var_2871_dilations_0 = const()[name = string("op_2871_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406443712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409065216))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2856_cast_fp16 = transpose(perm = var_2855, x = attn_output_39_cast_fp16)[name = string("transpose_28")]; + tensor var_2871_cast_fp16 = conv(dilations = var_2871_dilations_0, groups = var_2871_groups_0, pad = var_2871_pad_0, pad_type = var_2871_pad_type_0, strides = var_2871_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_2856_cast_fp16)[name = string("op_2871_cast_fp16")]; + tensor var_2875 = const()[name = string("op_2875"), val = tensor([0, 2, 1])]; + int32 var_2881 = const()[name = string("op_2881"), val = int32(-1)]; + fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_103_cast_fp16 = transpose(perm = var_2875, x = var_2871_cast_fp16)[name = string("transpose_27")]; + tensor var_2883_cast_fp16 = mul(x = x_103_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_2883_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_2881, interleave = input_153_interleave_0, values = (x_103_cast_fp16, var_2883_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_2878_to_fp16 = const()[name = string("op_2878_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_2878_to_fp16, x = input_153_cast_fp16)[name = string("normed_153_cast_fp16")]; + tensor var_2888_split_sizes_0 = const()[name = string("op_2888_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2888_axis_0 = const()[name = string("op_2888_axis_0"), val = int32(-1)]; + tensor var_2888_cast_fp16_0, tensor var_2888_cast_fp16_1 = split(axis = var_2888_axis_0, split_sizes = var_2888_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_2888_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409067840)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_2888_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_95_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_105_cast_fp16")]; + int32 var_2897 = const()[name = string("op_2897"), val = int32(-1)]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2899_cast_fp16 = mul(x = x_105_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2899_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_2897, interleave = input_155_interleave_0, values = (x_105_cast_fp16, var_2899_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_2894_to_fp16 = const()[name = string("op_2894_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_2894_to_fp16, x = input_155_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_2904_split_sizes_0 = const()[name = string("op_2904_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2904_axis_0 = const()[name = string("op_2904_axis_0"), val = int32(-1)]; + tensor var_2904_cast_fp16_0, tensor var_2904_cast_fp16_1 = split(axis = var_2904_axis_0, split_sizes = var_2904_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_2904_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409073024)))]; + tensor h_39_cast_fp16 = mul(x = var_2904_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_2915 = const()[name = string("op_2915"), val = tensor([0, 2, 1])]; + tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; + tensor var_2916 = transpose(perm = var_2915, x = h_39_cast_fp16)[name = string("transpose_26")]; + tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_2916)[name = string("input_157")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_157)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_157)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_159 = mul(x = gate_27, y = up_13)[name = string("input_159")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_159)[name = string("mlp_out_13")]; + tensor var_2956_axes_0 = const()[name = string("op_2956_axes_0"), val = tensor([2])]; + tensor var_2956 = squeeze(axes = var_2956_axes_0, x = mlp_out_13)[name = string("op_2956")]; + tensor var_2960 = const()[name = string("op_2960"), val = tensor([0, 2, 1])]; + int32 var_2966 = const()[name = string("op_2966"), val = int32(-1)]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor x_107 = transpose(perm = var_2960, x = var_2956)[name = string("transpose_25")]; + tensor var_2968 = mul(x = x_107, y = const_53_promoted)[name = string("op_2968")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161 = concat(axis = var_2966, interleave = input_161_interleave_0, values = (x_107, var_2968))[name = string("input_161")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_2963_to_fp16 = const()[name = string("op_2963_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_2963_to_fp16, x = input_161)[name = string("normed_161_cast_fp16")]; + tensor var_2973_split_sizes_0 = const()[name = string("op_2973_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2973_axis_0 = const()[name = string("op_2973_axis_0"), val = int32(-1)]; + tensor var_2973_0, tensor var_2973_1 = split(axis = var_2973_axis_0, split_sizes = var_2973_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_2973")]; + tensor hidden_states_63 = mul(x = var_2973_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_105_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 9984])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 1, 10240])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_3001 = const()[name = string("op_3001"), val = tensor([0, 2, 1])]; + tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; + tensor var_3002 = transpose(perm = var_3001, x = hidden_states_65_cast_fp16)[name = string("transpose_24")]; + tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_3002)[name = string("input_163")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_163)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_3021 = const()[name = string("op_3021"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_3022_cast_fp16 = transpose(perm = var_3021, x = per_layer_slice_13_cast_fp16)[name = string("transpose_23")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_3022_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_165_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_165_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409078208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409405952))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_165_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_3038_axes_0 = const()[name = string("op_3038_axes_0"), val = tensor([2])]; + tensor var_3038_cast_fp16 = squeeze(axes = var_3038_axes_0, x = gated_41_cast_fp16)[name = string("op_3038_cast_fp16")]; + tensor var_3042 = const()[name = string("op_3042"), val = tensor([0, 2, 1])]; + int32 var_3048 = const()[name = string("op_3048"), val = int32(-1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_109_cast_fp16 = transpose(perm = var_3042, x = var_3038_cast_fp16)[name = string("transpose_22")]; + tensor var_3050_cast_fp16 = mul(x = x_109_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_3050_cast_fp16")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167_cast_fp16 = concat(axis = var_3048, interleave = input_167_interleave_0, values = (x_109_cast_fp16, var_3050_cast_fp16))[name = string("input_167_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_3045_to_fp16 = const()[name = string("op_3045_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3045_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor var_3055_split_sizes_0 = const()[name = string("op_3055_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3055_axis_0 = const()[name = string("op_3055_axis_0"), val = int32(-1)]; + tensor var_3055_cast_fp16_0, tensor var_3055_cast_fp16_1 = split(axis = var_3055_axis_0, split_sizes = var_3055_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3055_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409408576)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_3055_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = tensor([0x1.b6p-1])]; + tensor x_111_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_55_promoted_to_fp16)[name = string("x_111_cast_fp16")]; + int32 var_3070 = const()[name = string("op_3070"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3072_cast_fp16 = mul(x = x_111_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3072_cast_fp16")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169_cast_fp16 = concat(axis = var_3070, interleave = input_169_interleave_0, values = (x_111_cast_fp16, var_3072_cast_fp16))[name = string("input_169_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_3067_to_fp16 = const()[name = string("op_3067_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3067_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_3077_split_sizes_0 = const()[name = string("op_3077_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3077_axis_0 = const()[name = string("op_3077_axis_0"), val = int32(-1)]; + tensor var_3077_cast_fp16_0, tensor var_3077_cast_fp16_1 = split(axis = var_3077_axis_0, split_sizes = var_3077_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3077_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409413760)))]; + tensor h_43_cast_fp16 = mul(x = var_3077_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_3083 = const()[name = string("op_3083"), val = tensor([0, 2, 1])]; + tensor var_3086_axes_0 = const()[name = string("op_3086_axes_0"), val = tensor([2])]; + tensor var_3084_cast_fp16 = transpose(perm = var_3083, x = h_43_cast_fp16)[name = string("transpose_21")]; + tensor var_3086_cast_fp16 = expand_dims(axes = var_3086_axes_0, x = var_3084_cast_fp16)[name = string("op_3086_cast_fp16")]; + string var_3102_pad_type_0 = const()[name = string("op_3102_pad_type_0"), val = string("valid")]; + tensor var_3102_strides_0 = const()[name = string("op_3102_strides_0"), val = tensor([1, 1])]; + tensor var_3102_pad_0 = const()[name = string("op_3102_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3102_dilations_0 = const()[name = string("op_3102_dilations_0"), val = tensor([1, 1])]; + int32 var_3102_groups_0 = const()[name = string("op_3102_groups_0"), val = int32(1)]; + tensor var_3102 = conv(dilations = var_3102_dilations_0, groups = var_3102_groups_0, pad = var_3102_pad_0, pad_type = var_3102_pad_type_0, strides = var_3102_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3086_cast_fp16)[name = string("op_3102")]; + tensor var_3107 = const()[name = string("op_3107"), val = tensor([1, 8, 256, 1])]; + tensor var_3108 = reshape(shape = var_3107, x = var_3102)[name = string("op_3108")]; + tensor var_3113 = const()[name = string("op_3113"), val = tensor([0, 1, 3, 2])]; + tensor var_3123 = const()[name = string("op_3123"), val = tensor([1, 8, 256])]; + tensor var_3114 = transpose(perm = var_3113, x = var_3108)[name = string("transpose_20")]; + tensor x_113 = reshape(shape = var_3123, x = var_3114)[name = string("x_113")]; + int32 var_3129 = const()[name = string("op_3129"), val = int32(-1)]; + fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; + tensor var_3131 = mul(x = x_113, y = const_57_promoted)[name = string("op_3131")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173 = concat(axis = var_3129, interleave = input_173_interleave_0, values = (x_113, var_3131))[name = string("input_173")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_3126_to_fp16 = const()[name = string("op_3126_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3126_to_fp16, x = input_173)[name = string("normed_173_cast_fp16")]; + tensor var_3136_split_sizes_0 = const()[name = string("op_3136_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3136_axis_0 = const()[name = string("op_3136_axis_0"), val = int32(-1)]; + tensor var_3136_0, tensor var_3136_1 = split(axis = var_3136_axis_0, split_sizes = var_3136_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3136")]; + tensor var_3138 = mul(x = var_3136_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_3138")]; + tensor var_3143 = const()[name = string("op_3143"), val = tensor([1, 8, 1, 256])]; + tensor q_45 = reshape(shape = var_3143, x = var_3138)[name = string("q_45")]; + tensor var_3145_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_3145_cast_fp16")]; + tensor var_3146_split_sizes_0 = const()[name = string("op_3146_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3146_axis_0 = const()[name = string("op_3146_axis_0"), val = int32(-1)]; + tensor var_3146_0, tensor var_3146_1 = split(axis = var_3146_axis_0, split_sizes = var_3146_split_sizes_0, x = q_45)[name = string("op_3146")]; + fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)]; + tensor var_3148 = mul(x = var_3146_1, y = const_58_promoted)[name = string("op_3148")]; + int32 var_3150 = const()[name = string("op_3150"), val = int32(-1)]; + bool var_3151_interleave_0 = const()[name = string("op_3151_interleave_0"), val = bool(false)]; + tensor var_3151 = concat(axis = var_3150, interleave = var_3151_interleave_0, values = (var_3148, var_3146_0))[name = string("op_3151")]; + tensor var_3152_cast_fp16 = mul(x = var_3151, y = sin_s)[name = string("op_3152_cast_fp16")]; + tensor q_47_cast_fp16 = add(x = var_3145_cast_fp16, y = var_3152_cast_fp16)[name = string("q_47_cast_fp16")]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_47_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_115_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_115_cast_fp16)[name = string("reduce_max_7")]; + tensor var_3184 = sub(x = x_115_cast_fp16, y = reduce_max_7)[name = string("op_3184")]; + tensor var_3190 = exp(x = var_3184)[name = string("op_3190")]; + tensor var_3200_axes_0 = const()[name = string("op_3200_axes_0"), val = tensor([-1])]; + bool var_3200_keep_dims_0 = const()[name = string("op_3200_keep_dims_0"), val = bool(true)]; + tensor var_3200 = reduce_sum(axes = var_3200_axes_0, keep_dims = var_3200_keep_dims_0, x = var_3190)[name = string("op_3200")]; + tensor var_3206_cast_fp16 = real_div(x = var_3190, y = var_3200)[name = string("op_3206_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_3206_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_3217 = const()[name = string("op_3217"), val = tensor([0, 2, 1, 3])]; + tensor var_3224 = const()[name = string("op_3224"), val = tensor([1, 1, -1])]; + tensor var_3218_cast_fp16 = transpose(perm = var_3217, x = attn_output_43_cast_fp16)[name = string("transpose_19")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_3224, x = var_3218_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_3229 = const()[name = string("op_3229"), val = tensor([0, 2, 1])]; + string var_3245_pad_type_0 = const()[name = string("op_3245_pad_type_0"), val = string("valid")]; + int32 var_3245_groups_0 = const()[name = string("op_3245_groups_0"), val = int32(1)]; + tensor var_3245_strides_0 = const()[name = string("op_3245_strides_0"), val = tensor([1])]; + tensor var_3245_pad_0 = const()[name = string("op_3245_pad_0"), val = tensor([0, 0])]; + tensor var_3245_dilations_0 = const()[name = string("op_3245_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409418944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412040448))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3230_cast_fp16 = transpose(perm = var_3229, x = attn_output_45_cast_fp16)[name = string("transpose_18")]; + tensor var_3245_cast_fp16 = conv(dilations = var_3245_dilations_0, groups = var_3245_groups_0, pad = var_3245_pad_0, pad_type = var_3245_pad_type_0, strides = var_3245_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_3230_cast_fp16)[name = string("op_3245_cast_fp16")]; + tensor var_3249 = const()[name = string("op_3249"), val = tensor([0, 2, 1])]; + int32 var_3255 = const()[name = string("op_3255"), val = int32(-1)]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_119_cast_fp16 = transpose(perm = var_3249, x = var_3245_cast_fp16)[name = string("transpose_17")]; + tensor var_3257_cast_fp16 = mul(x = x_119_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3257_cast_fp16")]; + bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; + tensor input_177_cast_fp16 = concat(axis = var_3255, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_3257_cast_fp16))[name = string("input_177_cast_fp16")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_3252_to_fp16 = const()[name = string("op_3252_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3252_to_fp16, x = input_177_cast_fp16)[name = string("normed_177_cast_fp16")]; + tensor var_3262_split_sizes_0 = const()[name = string("op_3262_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3262_axis_0 = const()[name = string("op_3262_axis_0"), val = int32(-1)]; + tensor var_3262_cast_fp16_0, tensor var_3262_cast_fp16_1 = split(axis = var_3262_axis_0, split_sizes = var_3262_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3262_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412043072)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_3262_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_111_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_121_cast_fp16")]; + int32 var_3271 = const()[name = string("op_3271"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3273_cast_fp16 = mul(x = x_121_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3273_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_3271, interleave = input_179_interleave_0, values = (x_121_cast_fp16, var_3273_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3268_to_fp16, x = input_179_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor var_3278_split_sizes_0 = const()[name = string("op_3278_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3278_axis_0 = const()[name = string("op_3278_axis_0"), val = int32(-1)]; + tensor var_3278_cast_fp16_0, tensor var_3278_cast_fp16_1 = split(axis = var_3278_axis_0, split_sizes = var_3278_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3278_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412048256)))]; + tensor h_45_cast_fp16 = mul(x = var_3278_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_3289 = const()[name = string("op_3289"), val = tensor([0, 2, 1])]; + tensor input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor([2])]; + tensor var_3290 = transpose(perm = var_3289, x = h_45_cast_fp16)[name = string("transpose_16")]; + tensor input_181 = expand_dims(axes = input_181_axes_0, x = var_3290)[name = string("input_181")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_183 = mul(x = gate_31, y = up_15)[name = string("input_183")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_15")]; + tensor var_3330_axes_0 = const()[name = string("op_3330_axes_0"), val = tensor([2])]; + tensor var_3330 = squeeze(axes = var_3330_axes_0, x = mlp_out_15)[name = string("op_3330")]; + tensor var_3334 = const()[name = string("op_3334"), val = tensor([0, 2, 1])]; + int32 var_3340 = const()[name = string("op_3340"), val = int32(-1)]; + fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; + tensor x_123 = transpose(perm = var_3334, x = var_3330)[name = string("transpose_15")]; + tensor var_3342 = mul(x = x_123, y = const_61_promoted)[name = string("op_3342")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_3340, interleave = input_185_interleave_0, values = (x_123, var_3342))[name = string("input_185")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_3337_to_fp16 = const()[name = string("op_3337_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3337_to_fp16, x = input_185)[name = string("normed_185_cast_fp16")]; + tensor var_3347_split_sizes_0 = const()[name = string("op_3347_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3347_axis_0 = const()[name = string("op_3347_axis_0"), val = int32(-1)]; + tensor var_3347_0, tensor var_3347_1 = split(axis = var_3347_axis_0, split_sizes = var_3347_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3347")]; + tensor hidden_states_73 = mul(x = var_3347_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_121_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 10240])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 1, 10496])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_3375 = const()[name = string("op_3375"), val = tensor([0, 2, 1])]; + tensor input_187_axes_0 = const()[name = string("input_187_axes_0"), val = tensor([2])]; + tensor var_3376 = transpose(perm = var_3375, x = hidden_states_75_cast_fp16)[name = string("transpose_14")]; + tensor input_187 = expand_dims(axes = input_187_axes_0, x = var_3376)[name = string("input_187")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_187)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_3395 = const()[name = string("op_3395"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_3396_cast_fp16 = transpose(perm = var_3395, x = per_layer_slice_15_cast_fp16)[name = string("transpose_13")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_3396_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_189_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_189_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412053440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412381184))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_189_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_3412_axes_0 = const()[name = string("op_3412_axes_0"), val = tensor([2])]; + tensor var_3412_cast_fp16 = squeeze(axes = var_3412_axes_0, x = gated_47_cast_fp16)[name = string("op_3412_cast_fp16")]; + tensor var_3416 = const()[name = string("op_3416"), val = tensor([0, 2, 1])]; + int32 var_3422 = const()[name = string("op_3422"), val = int32(-1)]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_125_cast_fp16 = transpose(perm = var_3416, x = var_3412_cast_fp16)[name = string("transpose_12")]; + tensor var_3424_cast_fp16 = mul(x = x_125_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_3424_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_3422, interleave = input_191_interleave_0, values = (x_125_cast_fp16, var_3424_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_3419_to_fp16 = const()[name = string("op_3419_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3419_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor var_3429_split_sizes_0 = const()[name = string("op_3429_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3429_axis_0 = const()[name = string("op_3429_axis_0"), val = int32(-1)]; + tensor var_3429_cast_fp16_0, tensor var_3429_cast_fp16_1 = split(axis = var_3429_axis_0, split_sizes = var_3429_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3429_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412383808)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_3429_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor([0x1.9ep-1])]; + tensor x_127_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_63_promoted_to_fp16)[name = string("x_127_cast_fp16")]; + int32 var_3444 = const()[name = string("op_3444"), val = int32(-1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3446_cast_fp16 = mul(x = x_127_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_3446_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_3444, interleave = input_193_interleave_0, values = (x_127_cast_fp16, var_3446_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_3441_to_fp16 = const()[name = string("op_3441_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3441_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor var_3451_split_sizes_0 = const()[name = string("op_3451_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3451_axis_0 = const()[name = string("op_3451_axis_0"), val = int32(-1)]; + tensor var_3451_cast_fp16_0, tensor var_3451_cast_fp16_1 = split(axis = var_3451_axis_0, split_sizes = var_3451_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3451_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412388992)))]; + tensor h_49_cast_fp16 = mul(x = var_3451_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_3457 = const()[name = string("op_3457"), val = tensor([0, 2, 1])]; + tensor var_3460_axes_0 = const()[name = string("op_3460_axes_0"), val = tensor([2])]; + tensor var_3458_cast_fp16 = transpose(perm = var_3457, x = h_49_cast_fp16)[name = string("transpose_11")]; + tensor var_3460_cast_fp16 = expand_dims(axes = var_3460_axes_0, x = var_3458_cast_fp16)[name = string("op_3460_cast_fp16")]; + string var_3476_pad_type_0 = const()[name = string("op_3476_pad_type_0"), val = string("valid")]; + tensor var_3476_strides_0 = const()[name = string("op_3476_strides_0"), val = tensor([1, 1])]; + tensor var_3476_pad_0 = const()[name = string("op_3476_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3476_dilations_0 = const()[name = string("op_3476_dilations_0"), val = tensor([1, 1])]; + int32 var_3476_groups_0 = const()[name = string("op_3476_groups_0"), val = int32(1)]; + tensor var_3476 = conv(dilations = var_3476_dilations_0, groups = var_3476_groups_0, pad = var_3476_pad_0, pad_type = var_3476_pad_type_0, strides = var_3476_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_3460_cast_fp16)[name = string("op_3476")]; + tensor var_3481 = const()[name = string("op_3481"), val = tensor([1, 8, 512, 1])]; + tensor var_3482 = reshape(shape = var_3481, x = var_3476)[name = string("op_3482")]; + tensor var_3487 = const()[name = string("op_3487"), val = tensor([0, 1, 3, 2])]; + tensor var_3497 = const()[name = string("op_3497"), val = tensor([1, 8, 512])]; + tensor var_3488 = transpose(perm = var_3487, x = var_3482)[name = string("transpose_10")]; + tensor x_129 = reshape(shape = var_3497, x = var_3488)[name = string("x_129")]; + int32 var_3503 = const()[name = string("op_3503"), val = int32(-1)]; + fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)]; + tensor var_3505 = mul(x = x_129, y = const_65_promoted)[name = string("op_3505")]; + bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; + tensor input_197 = concat(axis = var_3503, interleave = input_197_interleave_0, values = (x_129, var_3505))[name = string("input_197")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_3500_to_fp16, x = input_197)[name = string("normed_197_cast_fp16")]; + tensor var_3510_split_sizes_0 = const()[name = string("op_3510_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3510_axis_0 = const()[name = string("op_3510_axis_0"), val = int32(-1)]; + tensor var_3510_0, tensor var_3510_1 = split(axis = var_3510_axis_0, split_sizes = var_3510_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_3510")]; + tensor var_3512 = mul(x = var_3510_0, y = layers_2_self_attn_q_norm_weight)[name = string("op_3512")]; + tensor var_3517 = const()[name = string("op_3517"), val = tensor([1, 8, 1, 512])]; + tensor q_51 = reshape(shape = var_3517, x = var_3512)[name = string("q_51")]; + tensor var_3519_cast_fp16 = mul(x = q_51, y = cos_f)[name = string("op_3519_cast_fp16")]; + tensor var_3520_split_sizes_0 = const()[name = string("op_3520_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3520_axis_0 = const()[name = string("op_3520_axis_0"), val = int32(-1)]; + tensor var_3520_0, tensor var_3520_1 = split(axis = var_3520_axis_0, split_sizes = var_3520_split_sizes_0, x = q_51)[name = string("op_3520")]; + fp16 const_66_promoted = const()[name = string("const_66_promoted"), val = fp16(-0x1p+0)]; + tensor var_3522 = mul(x = var_3520_1, y = const_66_promoted)[name = string("op_3522")]; + int32 var_3524 = const()[name = string("op_3524"), val = int32(-1)]; + bool var_3525_interleave_0 = const()[name = string("op_3525_interleave_0"), val = bool(false)]; + tensor var_3525 = concat(axis = var_3524, interleave = var_3525_interleave_0, values = (var_3522, var_3520_0))[name = string("op_3525")]; + tensor var_3526_cast_fp16 = mul(x = var_3525, y = sin_f)[name = string("op_3526_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_3519_cast_fp16, y = var_3526_cast_fp16)[name = string("q_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_cast_fp16, y = transpose_38_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_131_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_full)[name = string("x_131_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_131_cast_fp16)[name = string("reduce_max_8")]; + tensor var_3558 = sub(x = x_131_cast_fp16, y = reduce_max_8)[name = string("op_3558")]; + tensor var_3564 = exp(x = var_3558)[name = string("op_3564")]; + tensor var_3574_axes_0 = const()[name = string("op_3574_axes_0"), val = tensor([-1])]; + bool var_3574_keep_dims_0 = const()[name = string("op_3574_keep_dims_0"), val = bool(true)]; + tensor var_3574 = reduce_sum(axes = var_3574_axes_0, keep_dims = var_3574_keep_dims_0, x = var_3564)[name = string("op_3574")]; + tensor var_3580_cast_fp16 = real_div(x = var_3564, y = var_3574)[name = string("op_3580_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_3580_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_3591 = const()[name = string("op_3591"), val = tensor([0, 2, 1, 3])]; + tensor var_3598 = const()[name = string("op_3598"), val = tensor([1, 1, -1])]; + tensor var_3592_cast_fp16 = transpose(perm = var_3591, x = attn_output_49_cast_fp16)[name = string("transpose_9")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_3598, x = var_3592_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_3603 = const()[name = string("op_3603"), val = tensor([0, 2, 1])]; + string var_3619_pad_type_0 = const()[name = string("op_3619_pad_type_0"), val = string("valid")]; + int32 var_3619_groups_0 = const()[name = string("op_3619_groups_0"), val = int32(1)]; + tensor var_3619_strides_0 = const()[name = string("op_3619_strides_0"), val = tensor([1])]; + tensor var_3619_pad_0 = const()[name = string("op_3619_pad_0"), val = tensor([0, 0])]; + tensor var_3619_dilations_0 = const()[name = string("op_3619_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412394176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417637120))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3604_cast_fp16 = transpose(perm = var_3603, x = attn_output_51_cast_fp16)[name = string("transpose_8")]; + tensor var_3619_cast_fp16 = conv(dilations = var_3619_dilations_0, groups = var_3619_groups_0, pad = var_3619_pad_0, pad_type = var_3619_pad_type_0, strides = var_3619_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_3604_cast_fp16)[name = string("op_3619_cast_fp16")]; + tensor var_3623 = const()[name = string("op_3623"), val = tensor([0, 2, 1])]; + int32 var_3629 = const()[name = string("op_3629"), val = int32(-1)]; + fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_135_cast_fp16 = transpose(perm = var_3623, x = var_3619_cast_fp16)[name = string("transpose_7")]; + tensor var_3631_cast_fp16 = mul(x = x_135_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3631_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_3629, interleave = input_201_interleave_0, values = (x_135_cast_fp16, var_3631_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_3626_to_fp16 = const()[name = string("op_3626_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_3626_to_fp16, x = input_201_cast_fp16)[name = string("normed_201_cast_fp16")]; + tensor var_3636_split_sizes_0 = const()[name = string("op_3636_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3636_axis_0 = const()[name = string("op_3636_axis_0"), val = int32(-1)]; + tensor var_3636_cast_fp16_0, tensor var_3636_cast_fp16_1 = split(axis = var_3636_axis_0, split_sizes = var_3636_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_3636_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417639744)))]; + tensor attn_output_cast_fp16 = mul(x = var_3636_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_137_cast_fp16 = add(x = x_127_cast_fp16, y = attn_output_cast_fp16)[name = string("x_137_cast_fp16")]; + int32 var_3645 = const()[name = string("op_3645"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3647_cast_fp16 = mul(x = x_137_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_3647_cast_fp16")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203_cast_fp16 = concat(axis = var_3645, interleave = input_203_interleave_0, values = (x_137_cast_fp16, var_3647_cast_fp16))[name = string("input_203_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_3642_to_fp16 = const()[name = string("op_3642_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_3642_to_fp16, x = input_203_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor var_3652_split_sizes_0 = const()[name = string("op_3652_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3652_axis_0 = const()[name = string("op_3652_axis_0"), val = int32(-1)]; + tensor var_3652_cast_fp16_0, tensor var_3652_cast_fp16_1 = split(axis = var_3652_axis_0, split_sizes = var_3652_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_3652_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417644928)))]; + tensor h_51_cast_fp16 = mul(x = var_3652_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_3663 = const()[name = string("op_3663"), val = tensor([0, 2, 1])]; + tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; + tensor var_3664 = transpose(perm = var_3663, x = h_51_cast_fp16)[name = string("transpose_6")]; + tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_3664)[name = string("input_205")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_205)[name = string("gate_33")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_205)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_33)[name = string("gate")]; + tensor input_207 = mul(x = gate, y = up)[name = string("input_207")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_207)[name = string("mlp_out")]; + tensor var_3704_axes_0 = const()[name = string("op_3704_axes_0"), val = tensor([2])]; + tensor var_3704 = squeeze(axes = var_3704_axes_0, x = mlp_out)[name = string("op_3704")]; + tensor var_3708 = const()[name = string("op_3708"), val = tensor([0, 2, 1])]; + int32 var_3714 = const()[name = string("op_3714"), val = int32(-1)]; + fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)]; + tensor x_139 = transpose(perm = var_3708, x = var_3704)[name = string("transpose_5")]; + tensor var_3716 = mul(x = x_139, y = const_69_promoted)[name = string("op_3716")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209 = concat(axis = var_3714, interleave = input_209_interleave_0, values = (x_139, var_3716))[name = string("input_209")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_3711_to_fp16 = const()[name = string("op_3711_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_3711_to_fp16, x = input_209)[name = string("normed_209_cast_fp16")]; + tensor var_3721_split_sizes_0 = const()[name = string("op_3721_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3721_axis_0 = const()[name = string("op_3721_axis_0"), val = int32(-1)]; + tensor var_3721_0, tensor var_3721_1 = split(axis = var_3721_axis_0, split_sizes = var_3721_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_3721")]; + tensor hidden_states_83 = mul(x = var_3721_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_137_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 10496])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 1, 1])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, true])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; + tensor var_3749 = const()[name = string("op_3749"), val = tensor([0, 2, 1])]; + tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; + tensor var_3750 = transpose(perm = var_3749, x = hidden_states_85_cast_fp16)[name = string("transpose_4")]; + tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_3750)[name = string("input_211")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_211)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_3769 = const()[name = string("op_3769"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_3770_cast_fp16 = transpose(perm = var_3769, x = per_layer_slice_cast_fp16)[name = string("transpose_3")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_3770_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_213_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_cast_fp16)[name = string("input_213_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417650112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417977856))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_213_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_3786_axes_0 = const()[name = string("op_3786_axes_0"), val = tensor([2])]; + tensor var_3786_cast_fp16 = squeeze(axes = var_3786_axes_0, x = gated_cast_fp16)[name = string("op_3786_cast_fp16")]; + tensor var_3790 = const()[name = string("op_3790"), val = tensor([0, 2, 1])]; + int32 var_3796 = const()[name = string("op_3796"), val = int32(-1)]; + fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_141_cast_fp16 = transpose(perm = var_3790, x = var_3786_cast_fp16)[name = string("transpose_2")]; + tensor var_3798_cast_fp16 = mul(x = x_141_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3798_cast_fp16")]; + bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)]; + tensor input_215_cast_fp16 = concat(axis = var_3796, interleave = input_215_interleave_0, values = (x_141_cast_fp16, var_3798_cast_fp16))[name = string("input_215_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_3793_to_fp16 = const()[name = string("op_3793_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_3793_to_fp16, x = input_215_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_3803_split_sizes_0 = const()[name = string("op_3803_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3803_axis_0 = const()[name = string("op_3803_axis_0"), val = int32(-1)]; + tensor var_3803_cast_fp16_0, tensor var_3803_cast_fp16_1 = split(axis = var_3803_axis_0, split_sizes = var_3803_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_3803_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417980480)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_3803_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = tensor([0x1.c8p-2])]; + tensor x_cast_fp16 = mul(x = hidden_states_cast_fp16, y = const_71_promoted_to_fp16)[name = string("x_cast_fp16")]; + int32 var_3818 = const()[name = string("op_3818"), val = int32(-1)]; + fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3820_cast_fp16 = mul(x = x_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_3820_cast_fp16")]; + bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; + tensor input_217_cast_fp16 = concat(axis = var_3818, interleave = input_217_interleave_0, values = (x_cast_fp16, var_3820_cast_fp16))[name = string("input_217_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_3815_to_fp16 = const()[name = string("op_3815_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_3815_to_fp16, x = input_217_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor var_3825_split_sizes_0 = const()[name = string("op_3825_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3825_axis_0 = const()[name = string("op_3825_axis_0"), val = int32(-1)]; + tensor var_3825_cast_fp16_0, tensor var_3825_cast_fp16_1 = split(axis = var_3825_axis_0, split_sizes = var_3825_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_3825_cast_fp16")]; + tensor norm_weight_promoted_to_fp16 = const()[name = string("norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417985664)))]; + tensor hidden_states_out = mul(x = var_3825_cast_fp16_0, y = norm_weight_promoted_to_fp16)[name = string("normed_221_cast_fp16")]; + tensor var_3836 = const()[name = string("op_3836"), val = tensor([0, 2, 1])]; + tensor squeeze_9_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417990848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(753535232))))[name = string("squeeze_9_palettized")]; + string var_3852_pad_type_0 = const()[name = string("op_3852_pad_type_0"), val = string("valid")]; + int32 var_3852_groups_0 = const()[name = string("op_3852_groups_0"), val = int32(1)]; + tensor var_3852_strides_0 = const()[name = string("op_3852_strides_0"), val = tensor([1])]; + tensor var_3852_pad_0 = const()[name = string("op_3852_pad_0"), val = tensor([0, 0])]; + tensor var_3852_dilations_0 = const()[name = string("op_3852_dilations_0"), val = tensor([1])]; + tensor var_3837 = transpose(perm = var_3836, x = hidden_states_out)[name = string("transpose_1")]; + tensor var_3852 = conv(dilations = var_3852_dilations_0, groups = var_3852_groups_0, pad = var_3852_pad_0, pad_type = var_3852_pad_type_0, strides = var_3852_strides_0, weight = squeeze_9_palettized, x = var_3837)[name = string("op_3852")]; + tensor var_3856 = const()[name = string("op_3856"), val = tensor([0, 2, 1])]; + fp16 _inversed_3859_y_0_to_fp16 = const()[name = string("_inversed_3859_y_0_to_fp16"), val = fp16(0x1.11p-5)]; + tensor logits_1 = transpose(perm = var_3856, x = var_3852)[name = string("transpose_0")]; + tensor _inversed_3859_cast_fp16 = mul(x = logits_1, y = _inversed_3859_y_0_to_fp16)[name = string("_inversed_3859_cast_fp16")]; + tensor var_3860_cast_fp16 = tanh(x = _inversed_3859_cast_fp16)[name = string("op_3860_cast_fp16")]; + fp16 var_3861_to_fp16 = const()[name = string("op_3861_to_fp16"), val = fp16(0x1.ep+4)]; + tensor logits_3_cast_fp16 = mul(x = var_3860_cast_fp16, y = var_3861_to_fp16)[name = string("logits_3_cast_fp16")]; + tensor logits_axes_0 = const()[name = string("logits_axes_0"), val = tensor([0])]; + tensor logits_cast_fp16 = squeeze(axes = logits_axes_0, x = logits_3_cast_fp16)[name = string("logits_cast_fp16")]; + int32 var_3866 = const()[name = string("op_3866"), val = int32(-1)]; + int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)]; + bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)]; + string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")]; + tensor token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits_cast_fp16)[name = string("token_id_cast_fp16")]; + tensor var_3868_axes_0 = const()[name = string("op_3868_axes_0"), val = tensor([-1])]; + tensor var_3868 = expand_dims(axes = var_3868_axes_0, x = token_id)[name = string("op_3868")]; + bool var_3869_validate_indices_0 = const()[name = string("op_3869_validate_indices_0"), val = bool(false)]; + tensor var_3869_cast_fp16 = gather_along_axis(axis = var_3866, indices = var_3868, validate_indices = var_3869_validate_indices_0, x = logits_cast_fp16)[name = string("op_3869_cast_fp16")]; + tensor var_3870_axes_0 = const()[name = string("op_3870_axes_0"), val = tensor([-1])]; + tensor token_logit = squeeze(axes = var_3870_axes_0, x = var_3869_cast_fp16)[name = string("op_3870_cast_fp16")]; + tensor update_mask_tmp = identity(x = update_mask)[name = string("update_mask_tmp")]; + } -> (token_id, token_logit, hidden_states_out); +} \ No newline at end of file diff --git a/chunk3_3way.mlmodelc/weights/weight.bin b/chunk3_3way.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..15bc928e2d7a003f03c909e6e1e2c768f065d6d5 --- /dev/null +++ b/chunk3_3way.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155bcb0a818cb9f95184346c2cc319980d33f6acf5ec4b14fec14abc61888cd9 +size 753797440 diff --git a/chunk4.mlmodelc/analytics/coremldata.bin b/chunk4.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc04edd91e2bc058db3b238437f022a6655eb9e1 --- /dev/null +++ b/chunk4.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58c1fb89f6c05774b2ac875839fcc1e5c153cc195cfe223ad9ffb42d2d30ea48 +size 243 diff --git a/chunk4.mlmodelc/coremldata.bin b/chunk4.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..78e37ebded6a28bd7beceb068c14f45fcb21382d --- /dev/null +++ b/chunk4.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b78c0ef3198782c4e4060bdf45f682dcb566666e05fee15858a5c2467c05965b +size 1014 diff --git a/chunk4.mlmodelc/model.mil b/chunk4.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..9a947bdc7fbff8084eaac9cb943c51f9c8c3b663 --- /dev/null +++ b/chunk4.mlmodelc/model.mil @@ -0,0 +1,3946 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func decode_q1(tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor kv13_k, tensor kv13_v, tensor kv14_k, tensor kv14_v, tensor per_layer_combined, tensor sin_f, tensor sin_s, tensor update_mask) { + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2621568))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2623680)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2624256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15731520))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15741824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28849088))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28859392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41966656))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41969280)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41974464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302208))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44924032))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44926144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58033408))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58043712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71150976))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71161280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84268544))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84271168)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84276352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604096))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89847360))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_self_attn_q_norm_weight = const()[name = string("layers_2_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89851520)))]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89852608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102959872))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102970176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116077440))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116087744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129195008))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129197632)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129202816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129530560))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129530880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132152384))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132154496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145261760))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145272064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158379328))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158389632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171496896))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171499520)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171504704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171832448))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171832768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174454272))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174456384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187563648))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187573952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200681216))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200691520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213798784))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213801408)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213806592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214134336))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214134656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216756160))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216758272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229865536))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229875840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242983104))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242993408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256100672))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256103296)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256108480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436224))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259058048))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259060160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272167424))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272177728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285284992))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298402560))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298405184)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298410368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738112))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301359936))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314469312))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314479616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327586880))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327597184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340704448))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340707072)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340712256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040000))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346283264))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346287424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359394688))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359404992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372512256))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372522560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385629824))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385632448)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385637632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385965376))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + int32 var_452 = const()[name = string("op_452"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_454_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_454_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_452, interleave = input_1_interleave_0, values = (hidden_states, var_454_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_449_to_fp16 = const()[name = string("op_449_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_449_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_459_split_sizes_0 = const()[name = string("op_459_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_459_axis_0 = const()[name = string("op_459_axis_0"), val = int32(-1)]; + tensor var_459_cast_fp16_0, tensor var_459_cast_fp16_1 = split(axis = var_459_axis_0, split_sizes = var_459_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_459_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385965696)))]; + tensor h_1_cast_fp16 = mul(x = var_459_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_465 = const()[name = string("op_465"), val = tensor([0, 2, 1])]; + tensor var_468_axes_0 = const()[name = string("op_468_axes_0"), val = tensor([2])]; + tensor var_466_cast_fp16 = transpose(perm = var_465, x = h_1_cast_fp16)[name = string("transpose_103")]; + tensor var_468_cast_fp16 = expand_dims(axes = var_468_axes_0, x = var_466_cast_fp16)[name = string("op_468_cast_fp16")]; + string var_484_pad_type_0 = const()[name = string("op_484_pad_type_0"), val = string("valid")]; + tensor var_484_strides_0 = const()[name = string("op_484_strides_0"), val = tensor([1, 1])]; + tensor var_484_pad_0 = const()[name = string("op_484_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_484_dilations_0 = const()[name = string("op_484_dilations_0"), val = tensor([1, 1])]; + int32 var_484_groups_0 = const()[name = string("op_484_groups_0"), val = int32(1)]; + tensor var_484 = conv(dilations = var_484_dilations_0, groups = var_484_groups_0, pad = var_484_pad_0, pad_type = var_484_pad_type_0, strides = var_484_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_468_cast_fp16)[name = string("op_484")]; + tensor var_489 = const()[name = string("op_489"), val = tensor([1, 8, 256, 1])]; + tensor var_490 = reshape(shape = var_489, x = var_484)[name = string("op_490")]; + tensor var_495 = const()[name = string("op_495"), val = tensor([0, 1, 3, 2])]; + tensor var_505 = const()[name = string("op_505"), val = tensor([1, 8, 256])]; + tensor var_496 = transpose(perm = var_495, x = var_490)[name = string("transpose_102")]; + tensor x_1 = reshape(shape = var_505, x = var_496)[name = string("x_1")]; + int32 var_511 = const()[name = string("op_511"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_513 = mul(x = x_1, y = const_1_promoted)[name = string("op_513")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_511, interleave = input_5_interleave_0, values = (x_1, var_513))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_508_to_fp16 = const()[name = string("op_508_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_508_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor var_518_split_sizes_0 = const()[name = string("op_518_split_sizes_0"), val = tensor([256, 256])]; + int32 var_518_axis_0 = const()[name = string("op_518_axis_0"), val = int32(-1)]; + tensor var_518_0, tensor var_518_1 = split(axis = var_518_axis_0, split_sizes = var_518_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_518")]; + tensor var_520 = mul(x = var_518_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_520")]; + tensor var_525 = const()[name = string("op_525"), val = tensor([1, 8, 1, 256])]; + tensor q_3 = reshape(shape = var_525, x = var_520)[name = string("q_3")]; + tensor var_527_cast_fp16 = mul(x = q_3, y = cos_s)[name = string("op_527_cast_fp16")]; + tensor var_528_split_sizes_0 = const()[name = string("op_528_split_sizes_0"), val = tensor([128, 128])]; + int32 var_528_axis_0 = const()[name = string("op_528_axis_0"), val = int32(-1)]; + tensor var_528_0, tensor var_528_1 = split(axis = var_528_axis_0, split_sizes = var_528_split_sizes_0, x = q_3)[name = string("op_528")]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_530 = mul(x = var_528_1, y = const_2_promoted)[name = string("op_530")]; + int32 var_532 = const()[name = string("op_532"), val = int32(-1)]; + bool var_533_interleave_0 = const()[name = string("op_533_interleave_0"), val = bool(false)]; + tensor var_533 = concat(axis = var_532, interleave = var_533_interleave_0, values = (var_530, var_528_0))[name = string("op_533")]; + tensor var_534_cast_fp16 = mul(x = var_533, y = sin_s)[name = string("op_534_cast_fp16")]; + tensor q_5_cast_fp16 = add(x = var_527_cast_fp16, y = var_534_cast_fp16)[name = string("q_5_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = kv13_k)[name = string("transpose_101")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_100")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = kv13_v)[name = string("transpose_99")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_98")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_36_cast_fp16 = transpose(perm = transpose_36_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_97")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_5_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_3_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_3_cast_fp16)[name = string("reduce_max_0")]; + tensor var_566 = sub(x = x_3_cast_fp16, y = reduce_max_0)[name = string("op_566")]; + tensor var_572 = exp(x = var_566)[name = string("op_572")]; + tensor var_582_axes_0 = const()[name = string("op_582_axes_0"), val = tensor([-1])]; + bool var_582_keep_dims_0 = const()[name = string("op_582_keep_dims_0"), val = bool(true)]; + tensor var_582 = reduce_sum(axes = var_582_axes_0, keep_dims = var_582_keep_dims_0, x = var_572)[name = string("op_582")]; + tensor var_588_cast_fp16 = real_div(x = var_572, y = var_582)[name = string("op_588_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_96")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_588_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_599 = const()[name = string("op_599"), val = tensor([0, 2, 1, 3])]; + tensor var_606 = const()[name = string("op_606"), val = tensor([1, 1, -1])]; + tensor var_600_cast_fp16 = transpose(perm = var_599, x = attn_output_1_cast_fp16)[name = string("transpose_95")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_606, x = var_600_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_611 = const()[name = string("op_611"), val = tensor([0, 2, 1])]; + string var_627_pad_type_0 = const()[name = string("op_627_pad_type_0"), val = string("valid")]; + int32 var_627_groups_0 = const()[name = string("op_627_groups_0"), val = int32(1)]; + tensor var_627_strides_0 = const()[name = string("op_627_strides_0"), val = tensor([1])]; + tensor var_627_pad_0 = const()[name = string("op_627_pad_0"), val = tensor([0, 0])]; + tensor var_627_dilations_0 = const()[name = string("op_627_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385970880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388592384))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_612_cast_fp16 = transpose(perm = var_611, x = attn_output_3_cast_fp16)[name = string("transpose_94")]; + tensor var_627_cast_fp16 = conv(dilations = var_627_dilations_0, groups = var_627_groups_0, pad = var_627_pad_0, pad_type = var_627_pad_type_0, strides = var_627_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_612_cast_fp16)[name = string("op_627_cast_fp16")]; + tensor var_631 = const()[name = string("op_631"), val = tensor([0, 2, 1])]; + int32 var_637 = const()[name = string("op_637"), val = int32(-1)]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_7_cast_fp16 = transpose(perm = var_631, x = var_627_cast_fp16)[name = string("transpose_93")]; + tensor var_639_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_639_cast_fp16")]; + bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; + tensor input_9_cast_fp16 = concat(axis = var_637, interleave = input_9_interleave_0, values = (x_7_cast_fp16, var_639_cast_fp16))[name = string("input_9_cast_fp16")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_634_to_fp16 = const()[name = string("op_634_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_634_to_fp16, x = input_9_cast_fp16)[name = string("normed_9_cast_fp16")]; + tensor var_644_split_sizes_0 = const()[name = string("op_644_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_644_axis_0 = const()[name = string("op_644_axis_0"), val = int32(-1)]; + tensor var_644_cast_fp16_0, tensor var_644_cast_fp16_1 = split(axis = var_644_axis_0, split_sizes = var_644_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_644_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388595008)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_644_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_9_cast_fp16")]; + int32 var_653 = const()[name = string("op_653"), val = int32(-1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_655_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_655_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_653, interleave = input_11_interleave_0, values = (x_9_cast_fp16, var_655_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_650_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor var_660_split_sizes_0 = const()[name = string("op_660_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_660_axis_0 = const()[name = string("op_660_axis_0"), val = int32(-1)]; + tensor var_660_cast_fp16_0, tensor var_660_cast_fp16_1 = split(axis = var_660_axis_0, split_sizes = var_660_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_660_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388600192)))]; + tensor h_3_cast_fp16 = mul(x = var_660_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_671 = const()[name = string("op_671"), val = tensor([0, 2, 1])]; + tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; + tensor var_672 = transpose(perm = var_671, x = h_3_cast_fp16)[name = string("transpose_92")]; + tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_672)[name = string("input_13")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_15 = mul(x = gate_3, y = up_1)[name = string("input_15")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_15)[name = string("mlp_out_1")]; + tensor var_712_axes_0 = const()[name = string("op_712_axes_0"), val = tensor([2])]; + tensor var_712 = squeeze(axes = var_712_axes_0, x = mlp_out_1)[name = string("op_712")]; + tensor var_716 = const()[name = string("op_716"), val = tensor([0, 2, 1])]; + int32 var_722 = const()[name = string("op_722"), val = int32(-1)]; + fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; + tensor x_11 = transpose(perm = var_716, x = var_712)[name = string("transpose_91")]; + tensor var_724 = mul(x = x_11, y = const_5_promoted)[name = string("op_724")]; + bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; + tensor input_17 = concat(axis = var_722, interleave = input_17_interleave_0, values = (x_11, var_724))[name = string("input_17")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_719_to_fp16 = const()[name = string("op_719_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_719_to_fp16, x = input_17)[name = string("normed_17_cast_fp16")]; + tensor var_729_split_sizes_0 = const()[name = string("op_729_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_729_axis_0 = const()[name = string("op_729_axis_0"), val = int32(-1)]; + tensor var_729_0, tensor var_729_1 = split(axis = var_729_axis_0, split_sizes = var_729_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_729")]; + tensor hidden_states_3 = mul(x = var_729_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 8448])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 1, 8704])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_757 = const()[name = string("op_757"), val = tensor([0, 2, 1])]; + tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([2])]; + tensor var_758 = transpose(perm = var_757, x = hidden_states_5_cast_fp16)[name = string("transpose_90")]; + tensor input_19 = expand_dims(axes = input_19_axes_0, x = var_758)[name = string("input_19")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_19)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_777 = const()[name = string("op_777"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_778_cast_fp16 = transpose(perm = var_777, x = per_layer_slice_1_cast_fp16)[name = string("transpose_89")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_778_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_21_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_21_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388605376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388933120))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_21_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_794_axes_0 = const()[name = string("op_794_axes_0"), val = tensor([2])]; + tensor var_794_cast_fp16 = squeeze(axes = var_794_axes_0, x = gated_5_cast_fp16)[name = string("op_794_cast_fp16")]; + tensor var_798 = const()[name = string("op_798"), val = tensor([0, 2, 1])]; + int32 var_804 = const()[name = string("op_804"), val = int32(-1)]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_13_cast_fp16 = transpose(perm = var_798, x = var_794_cast_fp16)[name = string("transpose_88")]; + tensor var_806_cast_fp16 = mul(x = x_13_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_806_cast_fp16")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23_cast_fp16 = concat(axis = var_804, interleave = input_23_interleave_0, values = (x_13_cast_fp16, var_806_cast_fp16))[name = string("input_23_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_801_to_fp16 = const()[name = string("op_801_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_801_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor var_811_split_sizes_0 = const()[name = string("op_811_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_811_axis_0 = const()[name = string("op_811_axis_0"), val = int32(-1)]; + tensor var_811_cast_fp16_0, tensor var_811_cast_fp16_1 = split(axis = var_811_axis_0, split_sizes = var_811_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_811_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388935744)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_811_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = tensor([0x1.a6p-1])]; + tensor x_15_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("x_15_cast_fp16")]; + int32 var_826 = const()[name = string("op_826"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_828_cast_fp16 = mul(x = x_15_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_828_cast_fp16")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25_cast_fp16 = concat(axis = var_826, interleave = input_25_interleave_0, values = (x_15_cast_fp16, var_828_cast_fp16))[name = string("input_25_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_823_to_fp16 = const()[name = string("op_823_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_823_to_fp16, x = input_25_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor var_833_split_sizes_0 = const()[name = string("op_833_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_833_axis_0 = const()[name = string("op_833_axis_0"), val = int32(-1)]; + tensor var_833_cast_fp16_0, tensor var_833_cast_fp16_1 = split(axis = var_833_axis_0, split_sizes = var_833_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_833_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388940928)))]; + tensor h_7_cast_fp16 = mul(x = var_833_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_839 = const()[name = string("op_839"), val = tensor([0, 2, 1])]; + tensor var_842_axes_0 = const()[name = string("op_842_axes_0"), val = tensor([2])]; + tensor var_840_cast_fp16 = transpose(perm = var_839, x = h_7_cast_fp16)[name = string("transpose_87")]; + tensor var_842_cast_fp16 = expand_dims(axes = var_842_axes_0, x = var_840_cast_fp16)[name = string("op_842_cast_fp16")]; + string var_858_pad_type_0 = const()[name = string("op_858_pad_type_0"), val = string("valid")]; + tensor var_858_strides_0 = const()[name = string("op_858_strides_0"), val = tensor([1, 1])]; + tensor var_858_pad_0 = const()[name = string("op_858_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_858_dilations_0 = const()[name = string("op_858_dilations_0"), val = tensor([1, 1])]; + int32 var_858_groups_0 = const()[name = string("op_858_groups_0"), val = int32(1)]; + tensor var_858 = conv(dilations = var_858_dilations_0, groups = var_858_groups_0, pad = var_858_pad_0, pad_type = var_858_pad_type_0, strides = var_858_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_842_cast_fp16)[name = string("op_858")]; + tensor var_863 = const()[name = string("op_863"), val = tensor([1, 8, 256, 1])]; + tensor var_864 = reshape(shape = var_863, x = var_858)[name = string("op_864")]; + tensor var_869 = const()[name = string("op_869"), val = tensor([0, 1, 3, 2])]; + tensor var_879 = const()[name = string("op_879"), val = tensor([1, 8, 256])]; + tensor var_870 = transpose(perm = var_869, x = var_864)[name = string("transpose_86")]; + tensor x_17 = reshape(shape = var_879, x = var_870)[name = string("x_17")]; + int32 var_885 = const()[name = string("op_885"), val = int32(-1)]; + fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; + tensor var_887 = mul(x = x_17, y = const_9_promoted)[name = string("op_887")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29 = concat(axis = var_885, interleave = input_29_interleave_0, values = (x_17, var_887))[name = string("input_29")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_882_to_fp16 = const()[name = string("op_882_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_882_to_fp16, x = input_29)[name = string("normed_29_cast_fp16")]; + tensor var_892_split_sizes_0 = const()[name = string("op_892_split_sizes_0"), val = tensor([256, 256])]; + int32 var_892_axis_0 = const()[name = string("op_892_axis_0"), val = int32(-1)]; + tensor var_892_0, tensor var_892_1 = split(axis = var_892_axis_0, split_sizes = var_892_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_892")]; + tensor var_894 = mul(x = var_892_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_894")]; + tensor var_899 = const()[name = string("op_899"), val = tensor([1, 8, 1, 256])]; + tensor q_9 = reshape(shape = var_899, x = var_894)[name = string("q_9")]; + tensor var_901_cast_fp16 = mul(x = q_9, y = cos_s)[name = string("op_901_cast_fp16")]; + tensor var_902_split_sizes_0 = const()[name = string("op_902_split_sizes_0"), val = tensor([128, 128])]; + int32 var_902_axis_0 = const()[name = string("op_902_axis_0"), val = int32(-1)]; + tensor var_902_0, tensor var_902_1 = split(axis = var_902_axis_0, split_sizes = var_902_split_sizes_0, x = q_9)[name = string("op_902")]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor var_904 = mul(x = var_902_1, y = const_10_promoted)[name = string("op_904")]; + int32 var_906 = const()[name = string("op_906"), val = int32(-1)]; + bool var_907_interleave_0 = const()[name = string("op_907_interleave_0"), val = bool(false)]; + tensor var_907 = concat(axis = var_906, interleave = var_907_interleave_0, values = (var_904, var_902_0))[name = string("op_907")]; + tensor var_908_cast_fp16 = mul(x = var_907, y = sin_s)[name = string("op_908_cast_fp16")]; + tensor q_11_cast_fp16 = add(x = var_901_cast_fp16, y = var_908_cast_fp16)[name = string("q_11_cast_fp16")]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_11_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_19_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_19_cast_fp16)[name = string("reduce_max_1")]; + tensor var_940 = sub(x = x_19_cast_fp16, y = reduce_max_1)[name = string("op_940")]; + tensor var_946 = exp(x = var_940)[name = string("op_946")]; + tensor var_956_axes_0 = const()[name = string("op_956_axes_0"), val = tensor([-1])]; + bool var_956_keep_dims_0 = const()[name = string("op_956_keep_dims_0"), val = bool(true)]; + tensor var_956 = reduce_sum(axes = var_956_axes_0, keep_dims = var_956_keep_dims_0, x = var_946)[name = string("op_956")]; + tensor var_962_cast_fp16 = real_div(x = var_946, y = var_956)[name = string("op_962_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_962_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_973 = const()[name = string("op_973"), val = tensor([0, 2, 1, 3])]; + tensor var_980 = const()[name = string("op_980"), val = tensor([1, 1, -1])]; + tensor var_974_cast_fp16 = transpose(perm = var_973, x = attn_output_7_cast_fp16)[name = string("transpose_85")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_980, x = var_974_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_985 = const()[name = string("op_985"), val = tensor([0, 2, 1])]; + string var_1001_pad_type_0 = const()[name = string("op_1001_pad_type_0"), val = string("valid")]; + int32 var_1001_groups_0 = const()[name = string("op_1001_groups_0"), val = int32(1)]; + tensor var_1001_strides_0 = const()[name = string("op_1001_strides_0"), val = tensor([1])]; + tensor var_1001_pad_0 = const()[name = string("op_1001_pad_0"), val = tensor([0, 0])]; + tensor var_1001_dilations_0 = const()[name = string("op_1001_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388946112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391567616))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_986_cast_fp16 = transpose(perm = var_985, x = attn_output_9_cast_fp16)[name = string("transpose_84")]; + tensor var_1001_cast_fp16 = conv(dilations = var_1001_dilations_0, groups = var_1001_groups_0, pad = var_1001_pad_0, pad_type = var_1001_pad_type_0, strides = var_1001_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_986_cast_fp16)[name = string("op_1001_cast_fp16")]; + tensor var_1005 = const()[name = string("op_1005"), val = tensor([0, 2, 1])]; + int32 var_1011 = const()[name = string("op_1011"), val = int32(-1)]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_23_cast_fp16 = transpose(perm = var_1005, x = var_1001_cast_fp16)[name = string("transpose_83")]; + tensor var_1013_cast_fp16 = mul(x = x_23_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1013_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_1011, interleave = input_33_interleave_0, values = (x_23_cast_fp16, var_1013_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1008_to_fp16, x = input_33_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor var_1018_split_sizes_0 = const()[name = string("op_1018_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1018_axis_0 = const()[name = string("op_1018_axis_0"), val = int32(-1)]; + tensor var_1018_cast_fp16_0, tensor var_1018_cast_fp16_1 = split(axis = var_1018_axis_0, split_sizes = var_1018_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1018_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391570240)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1018_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_15_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_1027 = const()[name = string("op_1027"), val = int32(-1)]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1029_cast_fp16 = mul(x = x_25_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1029_cast_fp16")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35_cast_fp16 = concat(axis = var_1027, interleave = input_35_interleave_0, values = (x_25_cast_fp16, var_1029_cast_fp16))[name = string("input_35_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1024_to_fp16 = const()[name = string("op_1024_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1024_to_fp16, x = input_35_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor var_1034_split_sizes_0 = const()[name = string("op_1034_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1034_axis_0 = const()[name = string("op_1034_axis_0"), val = int32(-1)]; + tensor var_1034_cast_fp16_0, tensor var_1034_cast_fp16_1 = split(axis = var_1034_axis_0, split_sizes = var_1034_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1034_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391575424)))]; + tensor h_9_cast_fp16 = mul(x = var_1034_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1045 = const()[name = string("op_1045"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_1046 = transpose(perm = var_1045, x = h_9_cast_fp16)[name = string("transpose_82")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_1046)[name = string("input_37")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_37)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_39 = mul(x = gate_7, y = up_3)[name = string("input_39")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_39)[name = string("mlp_out_3")]; + tensor var_1086_axes_0 = const()[name = string("op_1086_axes_0"), val = tensor([2])]; + tensor var_1086 = squeeze(axes = var_1086_axes_0, x = mlp_out_3)[name = string("op_1086")]; + tensor var_1090 = const()[name = string("op_1090"), val = tensor([0, 2, 1])]; + int32 var_1096 = const()[name = string("op_1096"), val = int32(-1)]; + fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; + tensor x_27 = transpose(perm = var_1090, x = var_1086)[name = string("transpose_81")]; + tensor var_1098 = mul(x = x_27, y = const_13_promoted)[name = string("op_1098")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_1096, interleave = input_41_interleave_0, values = (x_27, var_1098))[name = string("input_41")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1093_to_fp16 = const()[name = string("op_1093_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1093_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")]; + tensor var_1103_split_sizes_0 = const()[name = string("op_1103_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1103_axis_0 = const()[name = string("op_1103_axis_0"), val = int32(-1)]; + tensor var_1103_0, tensor var_1103_1 = split(axis = var_1103_axis_0, split_sizes = var_1103_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1103")]; + tensor hidden_states_13 = mul(x = var_1103_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 8704])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 1, 8960])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1131 = const()[name = string("op_1131"), val = tensor([0, 2, 1])]; + tensor input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor([2])]; + tensor var_1132 = transpose(perm = var_1131, x = hidden_states_15_cast_fp16)[name = string("transpose_80")]; + tensor input_43 = expand_dims(axes = input_43_axes_0, x = var_1132)[name = string("input_43")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_43)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1151 = const()[name = string("op_1151"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1152_cast_fp16 = transpose(perm = var_1151, x = per_layer_slice_3_cast_fp16)[name = string("transpose_79")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1152_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_45_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_45_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391580608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391908352))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_45_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_1168_axes_0 = const()[name = string("op_1168_axes_0"), val = tensor([2])]; + tensor var_1168_cast_fp16 = squeeze(axes = var_1168_axes_0, x = gated_11_cast_fp16)[name = string("op_1168_cast_fp16")]; + tensor var_1172 = const()[name = string("op_1172"), val = tensor([0, 2, 1])]; + int32 var_1178 = const()[name = string("op_1178"), val = int32(-1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_29_cast_fp16 = transpose(perm = var_1172, x = var_1168_cast_fp16)[name = string("transpose_78")]; + tensor var_1180_cast_fp16 = mul(x = x_29_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1180_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_1178, interleave = input_47_interleave_0, values = (x_29_cast_fp16, var_1180_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1175_to_fp16 = const()[name = string("op_1175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1175_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1185_split_sizes_0 = const()[name = string("op_1185_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1185_axis_0 = const()[name = string("op_1185_axis_0"), val = int32(-1)]; + tensor var_1185_cast_fp16_0, tensor var_1185_cast_fp16_1 = split(axis = var_1185_axis_0, split_sizes = var_1185_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1185_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391910976)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_1185_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = tensor([0x1.acp-1])]; + tensor x_31_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_15_promoted_to_fp16)[name = string("x_31_cast_fp16")]; + int32 var_1200 = const()[name = string("op_1200"), val = int32(-1)]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1202_cast_fp16 = mul(x = x_31_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1202_cast_fp16")]; + bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; + tensor input_49_cast_fp16 = concat(axis = var_1200, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1202_cast_fp16))[name = string("input_49_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1197_to_fp16 = const()[name = string("op_1197_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1197_to_fp16, x = input_49_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor var_1207_split_sizes_0 = const()[name = string("op_1207_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1207_axis_0 = const()[name = string("op_1207_axis_0"), val = int32(-1)]; + tensor var_1207_cast_fp16_0, tensor var_1207_cast_fp16_1 = split(axis = var_1207_axis_0, split_sizes = var_1207_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1207_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391916160)))]; + tensor h_13_cast_fp16 = mul(x = var_1207_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_1213 = const()[name = string("op_1213"), val = tensor([0, 2, 1])]; + tensor var_1216_axes_0 = const()[name = string("op_1216_axes_0"), val = tensor([2])]; + tensor var_1214_cast_fp16 = transpose(perm = var_1213, x = h_13_cast_fp16)[name = string("transpose_77")]; + tensor var_1216_cast_fp16 = expand_dims(axes = var_1216_axes_0, x = var_1214_cast_fp16)[name = string("op_1216_cast_fp16")]; + string var_1232_pad_type_0 = const()[name = string("op_1232_pad_type_0"), val = string("valid")]; + tensor var_1232_strides_0 = const()[name = string("op_1232_strides_0"), val = tensor([1, 1])]; + tensor var_1232_pad_0 = const()[name = string("op_1232_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1232_dilations_0 = const()[name = string("op_1232_dilations_0"), val = tensor([1, 1])]; + int32 var_1232_groups_0 = const()[name = string("op_1232_groups_0"), val = int32(1)]; + tensor var_1232 = conv(dilations = var_1232_dilations_0, groups = var_1232_groups_0, pad = var_1232_pad_0, pad_type = var_1232_pad_type_0, strides = var_1232_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1216_cast_fp16)[name = string("op_1232")]; + tensor var_1237 = const()[name = string("op_1237"), val = tensor([1, 8, 512, 1])]; + tensor var_1238 = reshape(shape = var_1237, x = var_1232)[name = string("op_1238")]; + tensor var_1243 = const()[name = string("op_1243"), val = tensor([0, 1, 3, 2])]; + tensor var_1253 = const()[name = string("op_1253"), val = tensor([1, 8, 512])]; + tensor var_1244 = transpose(perm = var_1243, x = var_1238)[name = string("transpose_76")]; + tensor x_33 = reshape(shape = var_1253, x = var_1244)[name = string("x_33")]; + int32 var_1259 = const()[name = string("op_1259"), val = int32(-1)]; + fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; + tensor var_1261 = mul(x = x_33, y = const_17_promoted)[name = string("op_1261")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53 = concat(axis = var_1259, interleave = input_53_interleave_0, values = (x_33, var_1261))[name = string("input_53")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1256_to_fp16 = const()[name = string("op_1256_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1256_to_fp16, x = input_53)[name = string("normed_53_cast_fp16")]; + tensor var_1266_split_sizes_0 = const()[name = string("op_1266_split_sizes_0"), val = tensor([512, 512])]; + int32 var_1266_axis_0 = const()[name = string("op_1266_axis_0"), val = int32(-1)]; + tensor var_1266_0, tensor var_1266_1 = split(axis = var_1266_axis_0, split_sizes = var_1266_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1266")]; + tensor var_1268 = mul(x = var_1266_0, y = layers_2_self_attn_q_norm_weight)[name = string("op_1268")]; + tensor var_1273 = const()[name = string("op_1273"), val = tensor([1, 8, 1, 512])]; + tensor q_15 = reshape(shape = var_1273, x = var_1268)[name = string("q_15")]; + tensor var_1275_cast_fp16 = mul(x = q_15, y = cos_f)[name = string("op_1275_cast_fp16")]; + tensor var_1276_split_sizes_0 = const()[name = string("op_1276_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1276_axis_0 = const()[name = string("op_1276_axis_0"), val = int32(-1)]; + tensor var_1276_0, tensor var_1276_1 = split(axis = var_1276_axis_0, split_sizes = var_1276_split_sizes_0, x = q_15)[name = string("op_1276")]; + fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)]; + tensor var_1278 = mul(x = var_1276_1, y = const_18_promoted)[name = string("op_1278")]; + int32 var_1280 = const()[name = string("op_1280"), val = int32(-1)]; + bool var_1281_interleave_0 = const()[name = string("op_1281_interleave_0"), val = bool(false)]; + tensor var_1281 = concat(axis = var_1280, interleave = var_1281_interleave_0, values = (var_1278, var_1276_0))[name = string("op_1281")]; + tensor var_1282_cast_fp16 = mul(x = var_1281, y = sin_f)[name = string("op_1282_cast_fp16")]; + tensor q_17_cast_fp16 = add(x = var_1275_cast_fp16, y = var_1282_cast_fp16)[name = string("q_17_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = kv14_k)[name = string("transpose_75")]; + tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_8, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_74")]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_9, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = kv14_v)[name = string("transpose_73")]; + tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_10, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; + tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_72")]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_11, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor transpose_38_cast_fp16 = transpose(perm = transpose_38_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_71")]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_17_cast_fp16, y = transpose_38_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_full)[name = string("x_35_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_35_cast_fp16)[name = string("reduce_max_2")]; + tensor var_1314 = sub(x = x_35_cast_fp16, y = reduce_max_2)[name = string("op_1314")]; + tensor var_1320 = exp(x = var_1314)[name = string("op_1320")]; + tensor var_1330_axes_0 = const()[name = string("op_1330_axes_0"), val = tensor([-1])]; + bool var_1330_keep_dims_0 = const()[name = string("op_1330_keep_dims_0"), val = bool(true)]; + tensor var_1330 = reduce_sum(axes = var_1330_axes_0, keep_dims = var_1330_keep_dims_0, x = var_1320)[name = string("op_1330")]; + tensor var_1336_cast_fp16 = real_div(x = var_1320, y = var_1330)[name = string("op_1336_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_70")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_1336_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_1347 = const()[name = string("op_1347"), val = tensor([0, 2, 1, 3])]; + tensor var_1354 = const()[name = string("op_1354"), val = tensor([1, 1, -1])]; + tensor var_1348_cast_fp16 = transpose(perm = var_1347, x = attn_output_13_cast_fp16)[name = string("transpose_69")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_1354, x = var_1348_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_1359 = const()[name = string("op_1359"), val = tensor([0, 2, 1])]; + string var_1375_pad_type_0 = const()[name = string("op_1375_pad_type_0"), val = string("valid")]; + int32 var_1375_groups_0 = const()[name = string("op_1375_groups_0"), val = int32(1)]; + tensor var_1375_strides_0 = const()[name = string("op_1375_strides_0"), val = tensor([1])]; + tensor var_1375_pad_0 = const()[name = string("op_1375_pad_0"), val = tensor([0, 0])]; + tensor var_1375_dilations_0 = const()[name = string("op_1375_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391921344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397164288))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1360_cast_fp16 = transpose(perm = var_1359, x = attn_output_15_cast_fp16)[name = string("transpose_68")]; + tensor var_1375_cast_fp16 = conv(dilations = var_1375_dilations_0, groups = var_1375_groups_0, pad = var_1375_pad_0, pad_type = var_1375_pad_type_0, strides = var_1375_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_1360_cast_fp16)[name = string("op_1375_cast_fp16")]; + tensor var_1379 = const()[name = string("op_1379"), val = tensor([0, 2, 1])]; + int32 var_1385 = const()[name = string("op_1385"), val = int32(-1)]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_39_cast_fp16 = transpose(perm = var_1379, x = var_1375_cast_fp16)[name = string("transpose_67")]; + tensor var_1387_cast_fp16 = mul(x = x_39_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1387_cast_fp16")]; + bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; + tensor input_57_cast_fp16 = concat(axis = var_1385, interleave = input_57_interleave_0, values = (x_39_cast_fp16, var_1387_cast_fp16))[name = string("input_57_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_1382_to_fp16 = const()[name = string("op_1382_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1382_to_fp16, x = input_57_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_1392_split_sizes_0 = const()[name = string("op_1392_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1392_axis_0 = const()[name = string("op_1392_axis_0"), val = int32(-1)]; + tensor var_1392_cast_fp16_0, tensor var_1392_cast_fp16_1 = split(axis = var_1392_axis_0, split_sizes = var_1392_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1392_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397166912)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_1392_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_41_cast_fp16 = add(x = x_31_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_41_cast_fp16")]; + int32 var_1401 = const()[name = string("op_1401"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1403_cast_fp16 = mul(x = x_41_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1403_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_1401, interleave = input_59_interleave_0, values = (x_41_cast_fp16, var_1403_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_1398_to_fp16 = const()[name = string("op_1398_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1398_to_fp16, x = input_59_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor var_1408_split_sizes_0 = const()[name = string("op_1408_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1408_axis_0 = const()[name = string("op_1408_axis_0"), val = int32(-1)]; + tensor var_1408_cast_fp16_0, tensor var_1408_cast_fp16_1 = split(axis = var_1408_axis_0, split_sizes = var_1408_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1408_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397172096)))]; + tensor h_15_cast_fp16 = mul(x = var_1408_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_1419 = const()[name = string("op_1419"), val = tensor([0, 2, 1])]; + tensor input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor([2])]; + tensor var_1420 = transpose(perm = var_1419, x = h_15_cast_fp16)[name = string("transpose_66")]; + tensor input_61 = expand_dims(axes = input_61_axes_0, x = var_1420)[name = string("input_61")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_61)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_61)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_63 = mul(x = gate_11, y = up_5)[name = string("input_63")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_63)[name = string("mlp_out_5")]; + tensor var_1460_axes_0 = const()[name = string("op_1460_axes_0"), val = tensor([2])]; + tensor var_1460 = squeeze(axes = var_1460_axes_0, x = mlp_out_5)[name = string("op_1460")]; + tensor var_1464 = const()[name = string("op_1464"), val = tensor([0, 2, 1])]; + int32 var_1470 = const()[name = string("op_1470"), val = int32(-1)]; + fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; + tensor x_43 = transpose(perm = var_1464, x = var_1460)[name = string("transpose_65")]; + tensor var_1472 = mul(x = x_43, y = const_21_promoted)[name = string("op_1472")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_1470, interleave = input_65_interleave_0, values = (x_43, var_1472))[name = string("input_65")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_1467_to_fp16 = const()[name = string("op_1467_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1467_to_fp16, x = input_65)[name = string("normed_65_cast_fp16")]; + tensor var_1477_split_sizes_0 = const()[name = string("op_1477_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1477_axis_0 = const()[name = string("op_1477_axis_0"), val = int32(-1)]; + tensor var_1477_0, tensor var_1477_1 = split(axis = var_1477_axis_0, split_sizes = var_1477_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1477")]; + tensor hidden_states_23 = mul(x = var_1477_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_41_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 8960])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 1, 9216])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_1505 = const()[name = string("op_1505"), val = tensor([0, 2, 1])]; + tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; + tensor var_1506 = transpose(perm = var_1505, x = hidden_states_25_cast_fp16)[name = string("transpose_64")]; + tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_1506)[name = string("input_67")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_67)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_1525 = const()[name = string("op_1525"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_1526_cast_fp16 = transpose(perm = var_1525, x = per_layer_slice_5_cast_fp16)[name = string("transpose_63")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_1526_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_69_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_69_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397177280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397505024))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_69_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_1542_axes_0 = const()[name = string("op_1542_axes_0"), val = tensor([2])]; + tensor var_1542_cast_fp16 = squeeze(axes = var_1542_axes_0, x = gated_17_cast_fp16)[name = string("op_1542_cast_fp16")]; + tensor var_1546 = const()[name = string("op_1546"), val = tensor([0, 2, 1])]; + int32 var_1552 = const()[name = string("op_1552"), val = int32(-1)]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_45_cast_fp16 = transpose(perm = var_1546, x = var_1542_cast_fp16)[name = string("transpose_62")]; + tensor var_1554_cast_fp16 = mul(x = x_45_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1554_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_1552, interleave = input_71_interleave_0, values = (x_45_cast_fp16, var_1554_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_1549_to_fp16 = const()[name = string("op_1549_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_1549_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor var_1559_split_sizes_0 = const()[name = string("op_1559_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1559_axis_0 = const()[name = string("op_1559_axis_0"), val = int32(-1)]; + tensor var_1559_cast_fp16_0, tensor var_1559_cast_fp16_1 = split(axis = var_1559_axis_0, split_sizes = var_1559_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_1559_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397507648)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_1559_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor([0x1.acp-1])]; + tensor x_47_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_47_cast_fp16")]; + int32 var_1574 = const()[name = string("op_1574"), val = int32(-1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1576_cast_fp16 = mul(x = x_47_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1576_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_1574, interleave = input_73_interleave_0, values = (x_47_cast_fp16, var_1576_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_1571_to_fp16 = const()[name = string("op_1571_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_1571_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_1581_split_sizes_0 = const()[name = string("op_1581_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1581_axis_0 = const()[name = string("op_1581_axis_0"), val = int32(-1)]; + tensor var_1581_cast_fp16_0, tensor var_1581_cast_fp16_1 = split(axis = var_1581_axis_0, split_sizes = var_1581_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_1581_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397512832)))]; + tensor h_19_cast_fp16 = mul(x = var_1581_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_1587 = const()[name = string("op_1587"), val = tensor([0, 2, 1])]; + tensor var_1590_axes_0 = const()[name = string("op_1590_axes_0"), val = tensor([2])]; + tensor var_1588_cast_fp16 = transpose(perm = var_1587, x = h_19_cast_fp16)[name = string("transpose_61")]; + tensor var_1590_cast_fp16 = expand_dims(axes = var_1590_axes_0, x = var_1588_cast_fp16)[name = string("op_1590_cast_fp16")]; + string var_1606_pad_type_0 = const()[name = string("op_1606_pad_type_0"), val = string("valid")]; + tensor var_1606_strides_0 = const()[name = string("op_1606_strides_0"), val = tensor([1, 1])]; + tensor var_1606_pad_0 = const()[name = string("op_1606_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1606_dilations_0 = const()[name = string("op_1606_dilations_0"), val = tensor([1, 1])]; + int32 var_1606_groups_0 = const()[name = string("op_1606_groups_0"), val = int32(1)]; + tensor var_1606 = conv(dilations = var_1606_dilations_0, groups = var_1606_groups_0, pad = var_1606_pad_0, pad_type = var_1606_pad_type_0, strides = var_1606_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_1590_cast_fp16)[name = string("op_1606")]; + tensor var_1611 = const()[name = string("op_1611"), val = tensor([1, 8, 256, 1])]; + tensor var_1612 = reshape(shape = var_1611, x = var_1606)[name = string("op_1612")]; + tensor var_1617 = const()[name = string("op_1617"), val = tensor([0, 1, 3, 2])]; + tensor var_1627 = const()[name = string("op_1627"), val = tensor([1, 8, 256])]; + tensor var_1618 = transpose(perm = var_1617, x = var_1612)[name = string("transpose_60")]; + tensor x_49 = reshape(shape = var_1627, x = var_1618)[name = string("x_49")]; + int32 var_1633 = const()[name = string("op_1633"), val = int32(-1)]; + fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; + tensor var_1635 = mul(x = x_49, y = const_25_promoted)[name = string("op_1635")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77 = concat(axis = var_1633, interleave = input_77_interleave_0, values = (x_49, var_1635))[name = string("input_77")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_1630_to_fp16 = const()[name = string("op_1630_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_1630_to_fp16, x = input_77)[name = string("normed_77_cast_fp16")]; + tensor var_1640_split_sizes_0 = const()[name = string("op_1640_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1640_axis_0 = const()[name = string("op_1640_axis_0"), val = int32(-1)]; + tensor var_1640_0, tensor var_1640_1 = split(axis = var_1640_axis_0, split_sizes = var_1640_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_1640")]; + tensor var_1642 = mul(x = var_1640_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_1642")]; + tensor var_1647 = const()[name = string("op_1647"), val = tensor([1, 8, 1, 256])]; + tensor q_21 = reshape(shape = var_1647, x = var_1642)[name = string("q_21")]; + tensor var_1649_cast_fp16 = mul(x = q_21, y = cos_s)[name = string("op_1649_cast_fp16")]; + tensor var_1650_split_sizes_0 = const()[name = string("op_1650_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1650_axis_0 = const()[name = string("op_1650_axis_0"), val = int32(-1)]; + tensor var_1650_0, tensor var_1650_1 = split(axis = var_1650_axis_0, split_sizes = var_1650_split_sizes_0, x = q_21)[name = string("op_1650")]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_1652 = mul(x = var_1650_1, y = const_26_promoted)[name = string("op_1652")]; + int32 var_1654 = const()[name = string("op_1654"), val = int32(-1)]; + bool var_1655_interleave_0 = const()[name = string("op_1655_interleave_0"), val = bool(false)]; + tensor var_1655 = concat(axis = var_1654, interleave = var_1655_interleave_0, values = (var_1652, var_1650_0))[name = string("op_1655")]; + tensor var_1656_cast_fp16 = mul(x = var_1655, y = sin_s)[name = string("op_1656_cast_fp16")]; + tensor q_23_cast_fp16 = add(x = var_1649_cast_fp16, y = var_1656_cast_fp16)[name = string("q_23_cast_fp16")]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_23_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_51_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_51_cast_fp16)[name = string("reduce_max_3")]; + tensor var_1688 = sub(x = x_51_cast_fp16, y = reduce_max_3)[name = string("op_1688")]; + tensor var_1694 = exp(x = var_1688)[name = string("op_1694")]; + tensor var_1704_axes_0 = const()[name = string("op_1704_axes_0"), val = tensor([-1])]; + bool var_1704_keep_dims_0 = const()[name = string("op_1704_keep_dims_0"), val = bool(true)]; + tensor var_1704 = reduce_sum(axes = var_1704_axes_0, keep_dims = var_1704_keep_dims_0, x = var_1694)[name = string("op_1704")]; + tensor var_1710_cast_fp16 = real_div(x = var_1694, y = var_1704)[name = string("op_1710_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_1710_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_1721 = const()[name = string("op_1721"), val = tensor([0, 2, 1, 3])]; + tensor var_1728 = const()[name = string("op_1728"), val = tensor([1, 1, -1])]; + tensor var_1722_cast_fp16 = transpose(perm = var_1721, x = attn_output_19_cast_fp16)[name = string("transpose_59")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_1728, x = var_1722_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1733 = const()[name = string("op_1733"), val = tensor([0, 2, 1])]; + string var_1749_pad_type_0 = const()[name = string("op_1749_pad_type_0"), val = string("valid")]; + int32 var_1749_groups_0 = const()[name = string("op_1749_groups_0"), val = int32(1)]; + tensor var_1749_strides_0 = const()[name = string("op_1749_strides_0"), val = tensor([1])]; + tensor var_1749_pad_0 = const()[name = string("op_1749_pad_0"), val = tensor([0, 0])]; + tensor var_1749_dilations_0 = const()[name = string("op_1749_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397518016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400139520))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1734_cast_fp16 = transpose(perm = var_1733, x = attn_output_21_cast_fp16)[name = string("transpose_58")]; + tensor var_1749_cast_fp16 = conv(dilations = var_1749_dilations_0, groups = var_1749_groups_0, pad = var_1749_pad_0, pad_type = var_1749_pad_type_0, strides = var_1749_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_1734_cast_fp16)[name = string("op_1749_cast_fp16")]; + tensor var_1753 = const()[name = string("op_1753"), val = tensor([0, 2, 1])]; + int32 var_1759 = const()[name = string("op_1759"), val = int32(-1)]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_55_cast_fp16 = transpose(perm = var_1753, x = var_1749_cast_fp16)[name = string("transpose_57")]; + tensor var_1761_cast_fp16 = mul(x = x_55_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1761_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_1759, interleave = input_81_interleave_0, values = (x_55_cast_fp16, var_1761_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_1756_to_fp16 = const()[name = string("op_1756_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_1756_to_fp16, x = input_81_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor var_1766_split_sizes_0 = const()[name = string("op_1766_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1766_axis_0 = const()[name = string("op_1766_axis_0"), val = int32(-1)]; + tensor var_1766_cast_fp16_0, tensor var_1766_cast_fp16_1 = split(axis = var_1766_axis_0, split_sizes = var_1766_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_1766_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400142144)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_1766_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_47_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_57_cast_fp16")]; + int32 var_1775 = const()[name = string("op_1775"), val = int32(-1)]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1777_cast_fp16 = mul(x = x_57_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1777_cast_fp16")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83_cast_fp16 = concat(axis = var_1775, interleave = input_83_interleave_0, values = (x_57_cast_fp16, var_1777_cast_fp16))[name = string("input_83_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_1772_to_fp16 = const()[name = string("op_1772_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_1772_to_fp16, x = input_83_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_1782_split_sizes_0 = const()[name = string("op_1782_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1782_axis_0 = const()[name = string("op_1782_axis_0"), val = int32(-1)]; + tensor var_1782_cast_fp16_0, tensor var_1782_cast_fp16_1 = split(axis = var_1782_axis_0, split_sizes = var_1782_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_1782_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400147328)))]; + tensor h_21_cast_fp16 = mul(x = var_1782_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_1793 = const()[name = string("op_1793"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_1794 = transpose(perm = var_1793, x = h_21_cast_fp16)[name = string("transpose_56")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_1794)[name = string("input_85")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_85)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_85)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_87 = mul(x = gate_15, y = up_7)[name = string("input_87")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_87)[name = string("mlp_out_7")]; + tensor var_1834_axes_0 = const()[name = string("op_1834_axes_0"), val = tensor([2])]; + tensor var_1834 = squeeze(axes = var_1834_axes_0, x = mlp_out_7)[name = string("op_1834")]; + tensor var_1838 = const()[name = string("op_1838"), val = tensor([0, 2, 1])]; + int32 var_1844 = const()[name = string("op_1844"), val = int32(-1)]; + fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; + tensor x_59 = transpose(perm = var_1838, x = var_1834)[name = string("transpose_55")]; + tensor var_1846 = mul(x = x_59, y = const_29_promoted)[name = string("op_1846")]; + bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; + tensor input_89 = concat(axis = var_1844, interleave = input_89_interleave_0, values = (x_59, var_1846))[name = string("input_89")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_1841_to_fp16 = const()[name = string("op_1841_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_1841_to_fp16, x = input_89)[name = string("normed_89_cast_fp16")]; + tensor var_1851_split_sizes_0 = const()[name = string("op_1851_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1851_axis_0 = const()[name = string("op_1851_axis_0"), val = int32(-1)]; + tensor var_1851_0, tensor var_1851_1 = split(axis = var_1851_axis_0, split_sizes = var_1851_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_1851")]; + tensor hidden_states_33 = mul(x = var_1851_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_57_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 9216])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 1, 9472])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_1879 = const()[name = string("op_1879"), val = tensor([0, 2, 1])]; + tensor input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor([2])]; + tensor var_1880 = transpose(perm = var_1879, x = hidden_states_35_cast_fp16)[name = string("transpose_54")]; + tensor input_91 = expand_dims(axes = input_91_axes_0, x = var_1880)[name = string("input_91")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_91)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_1899 = const()[name = string("op_1899"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_1900_cast_fp16 = transpose(perm = var_1899, x = per_layer_slice_7_cast_fp16)[name = string("transpose_53")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_1900_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_93_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_93_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400152512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400480256))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_93_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_1916_axes_0 = const()[name = string("op_1916_axes_0"), val = tensor([2])]; + tensor var_1916_cast_fp16 = squeeze(axes = var_1916_axes_0, x = gated_23_cast_fp16)[name = string("op_1916_cast_fp16")]; + tensor var_1920 = const()[name = string("op_1920"), val = tensor([0, 2, 1])]; + int32 var_1926 = const()[name = string("op_1926"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_61_cast_fp16 = transpose(perm = var_1920, x = var_1916_cast_fp16)[name = string("transpose_52")]; + tensor var_1928_cast_fp16 = mul(x = x_61_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_1928_cast_fp16")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95_cast_fp16 = concat(axis = var_1926, interleave = input_95_interleave_0, values = (x_61_cast_fp16, var_1928_cast_fp16))[name = string("input_95_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_1923_to_fp16 = const()[name = string("op_1923_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_1923_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor var_1933_split_sizes_0 = const()[name = string("op_1933_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1933_axis_0 = const()[name = string("op_1933_axis_0"), val = int32(-1)]; + tensor var_1933_cast_fp16_0, tensor var_1933_cast_fp16_1 = split(axis = var_1933_axis_0, split_sizes = var_1933_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_1933_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400482880)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_1933_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = tensor([0x1.b6p-1])]; + tensor x_63_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_31_promoted_to_fp16)[name = string("x_63_cast_fp16")]; + int32 var_1948 = const()[name = string("op_1948"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1950_cast_fp16 = mul(x = x_63_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_1950_cast_fp16")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97_cast_fp16 = concat(axis = var_1948, interleave = input_97_interleave_0, values = (x_63_cast_fp16, var_1950_cast_fp16))[name = string("input_97_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_1945_to_fp16 = const()[name = string("op_1945_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_1945_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor var_1955_split_sizes_0 = const()[name = string("op_1955_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1955_axis_0 = const()[name = string("op_1955_axis_0"), val = int32(-1)]; + tensor var_1955_cast_fp16_0, tensor var_1955_cast_fp16_1 = split(axis = var_1955_axis_0, split_sizes = var_1955_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_1955_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400488064)))]; + tensor h_25_cast_fp16 = mul(x = var_1955_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_1961 = const()[name = string("op_1961"), val = tensor([0, 2, 1])]; + tensor var_1964_axes_0 = const()[name = string("op_1964_axes_0"), val = tensor([2])]; + tensor var_1962_cast_fp16 = transpose(perm = var_1961, x = h_25_cast_fp16)[name = string("transpose_51")]; + tensor var_1964_cast_fp16 = expand_dims(axes = var_1964_axes_0, x = var_1962_cast_fp16)[name = string("op_1964_cast_fp16")]; + string var_1980_pad_type_0 = const()[name = string("op_1980_pad_type_0"), val = string("valid")]; + tensor var_1980_strides_0 = const()[name = string("op_1980_strides_0"), val = tensor([1, 1])]; + tensor var_1980_pad_0 = const()[name = string("op_1980_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1980_dilations_0 = const()[name = string("op_1980_dilations_0"), val = tensor([1, 1])]; + int32 var_1980_groups_0 = const()[name = string("op_1980_groups_0"), val = int32(1)]; + tensor var_1980 = conv(dilations = var_1980_dilations_0, groups = var_1980_groups_0, pad = var_1980_pad_0, pad_type = var_1980_pad_type_0, strides = var_1980_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_1964_cast_fp16)[name = string("op_1980")]; + tensor var_1985 = const()[name = string("op_1985"), val = tensor([1, 8, 256, 1])]; + tensor var_1986 = reshape(shape = var_1985, x = var_1980)[name = string("op_1986")]; + tensor var_1991 = const()[name = string("op_1991"), val = tensor([0, 1, 3, 2])]; + tensor var_2001 = const()[name = string("op_2001"), val = tensor([1, 8, 256])]; + tensor var_1992 = transpose(perm = var_1991, x = var_1986)[name = string("transpose_50")]; + tensor x_65 = reshape(shape = var_2001, x = var_1992)[name = string("x_65")]; + int32 var_2007 = const()[name = string("op_2007"), val = int32(-1)]; + fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; + tensor var_2009 = mul(x = x_65, y = const_33_promoted)[name = string("op_2009")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101 = concat(axis = var_2007, interleave = input_101_interleave_0, values = (x_65, var_2009))[name = string("input_101")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_2004_to_fp16 = const()[name = string("op_2004_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2004_to_fp16, x = input_101)[name = string("normed_101_cast_fp16")]; + tensor var_2014_split_sizes_0 = const()[name = string("op_2014_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2014_axis_0 = const()[name = string("op_2014_axis_0"), val = int32(-1)]; + tensor var_2014_0, tensor var_2014_1 = split(axis = var_2014_axis_0, split_sizes = var_2014_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2014")]; + tensor var_2016 = mul(x = var_2014_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2016")]; + tensor var_2021 = const()[name = string("op_2021"), val = tensor([1, 8, 1, 256])]; + tensor q_27 = reshape(shape = var_2021, x = var_2016)[name = string("q_27")]; + tensor var_2023_cast_fp16 = mul(x = q_27, y = cos_s)[name = string("op_2023_cast_fp16")]; + tensor var_2024_split_sizes_0 = const()[name = string("op_2024_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2024_axis_0 = const()[name = string("op_2024_axis_0"), val = int32(-1)]; + tensor var_2024_0, tensor var_2024_1 = split(axis = var_2024_axis_0, split_sizes = var_2024_split_sizes_0, x = q_27)[name = string("op_2024")]; + fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; + tensor var_2026 = mul(x = var_2024_1, y = const_34_promoted)[name = string("op_2026")]; + int32 var_2028 = const()[name = string("op_2028"), val = int32(-1)]; + bool var_2029_interleave_0 = const()[name = string("op_2029_interleave_0"), val = bool(false)]; + tensor var_2029 = concat(axis = var_2028, interleave = var_2029_interleave_0, values = (var_2026, var_2024_0))[name = string("op_2029")]; + tensor var_2030_cast_fp16 = mul(x = var_2029, y = sin_s)[name = string("op_2030_cast_fp16")]; + tensor q_29_cast_fp16 = add(x = var_2023_cast_fp16, y = var_2030_cast_fp16)[name = string("q_29_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_29_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_4")]; + tensor var_2062 = sub(x = x_67_cast_fp16, y = reduce_max_4)[name = string("op_2062")]; + tensor var_2068 = exp(x = var_2062)[name = string("op_2068")]; + tensor var_2078_axes_0 = const()[name = string("op_2078_axes_0"), val = tensor([-1])]; + bool var_2078_keep_dims_0 = const()[name = string("op_2078_keep_dims_0"), val = bool(true)]; + tensor var_2078 = reduce_sum(axes = var_2078_axes_0, keep_dims = var_2078_keep_dims_0, x = var_2068)[name = string("op_2078")]; + tensor var_2084_cast_fp16 = real_div(x = var_2068, y = var_2078)[name = string("op_2084_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_2084_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_2095 = const()[name = string("op_2095"), val = tensor([0, 2, 1, 3])]; + tensor var_2102 = const()[name = string("op_2102"), val = tensor([1, 1, -1])]; + tensor var_2096_cast_fp16 = transpose(perm = var_2095, x = attn_output_25_cast_fp16)[name = string("transpose_49")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_2102, x = var_2096_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_2107 = const()[name = string("op_2107"), val = tensor([0, 2, 1])]; + string var_2123_pad_type_0 = const()[name = string("op_2123_pad_type_0"), val = string("valid")]; + int32 var_2123_groups_0 = const()[name = string("op_2123_groups_0"), val = int32(1)]; + tensor var_2123_strides_0 = const()[name = string("op_2123_strides_0"), val = tensor([1])]; + tensor var_2123_pad_0 = const()[name = string("op_2123_pad_0"), val = tensor([0, 0])]; + tensor var_2123_dilations_0 = const()[name = string("op_2123_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400493248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403114752))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2108_cast_fp16 = transpose(perm = var_2107, x = attn_output_27_cast_fp16)[name = string("transpose_48")]; + tensor var_2123_cast_fp16 = conv(dilations = var_2123_dilations_0, groups = var_2123_groups_0, pad = var_2123_pad_0, pad_type = var_2123_pad_type_0, strides = var_2123_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_2108_cast_fp16)[name = string("op_2123_cast_fp16")]; + tensor var_2127 = const()[name = string("op_2127"), val = tensor([0, 2, 1])]; + int32 var_2133 = const()[name = string("op_2133"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_2127, x = var_2123_cast_fp16)[name = string("transpose_47")]; + tensor var_2135_cast_fp16 = mul(x = x_71_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2135_cast_fp16")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105_cast_fp16 = concat(axis = var_2133, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2135_cast_fp16))[name = string("input_105_cast_fp16")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_2130_to_fp16 = const()[name = string("op_2130_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2130_to_fp16, x = input_105_cast_fp16)[name = string("normed_105_cast_fp16")]; + tensor var_2140_split_sizes_0 = const()[name = string("op_2140_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2140_axis_0 = const()[name = string("op_2140_axis_0"), val = int32(-1)]; + tensor var_2140_cast_fp16_0, tensor var_2140_cast_fp16_1 = split(axis = var_2140_axis_0, split_sizes = var_2140_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2140_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403117376)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_2140_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_63_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_2149 = const()[name = string("op_2149"), val = int32(-1)]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2151_cast_fp16 = mul(x = x_73_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2151_cast_fp16")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107_cast_fp16 = concat(axis = var_2149, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2151_cast_fp16))[name = string("input_107_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_2146_to_fp16 = const()[name = string("op_2146_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2146_to_fp16, x = input_107_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor var_2156_split_sizes_0 = const()[name = string("op_2156_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2156_axis_0 = const()[name = string("op_2156_axis_0"), val = int32(-1)]; + tensor var_2156_cast_fp16_0, tensor var_2156_cast_fp16_1 = split(axis = var_2156_axis_0, split_sizes = var_2156_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2156_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403122560)))]; + tensor h_27_cast_fp16 = mul(x = var_2156_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_2167 = const()[name = string("op_2167"), val = tensor([0, 2, 1])]; + tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([2])]; + tensor var_2168 = transpose(perm = var_2167, x = h_27_cast_fp16)[name = string("transpose_46")]; + tensor input_109 = expand_dims(axes = input_109_axes_0, x = var_2168)[name = string("input_109")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_111 = mul(x = gate_19, y = up_9)[name = string("input_111")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_9")]; + tensor var_2208_axes_0 = const()[name = string("op_2208_axes_0"), val = tensor([2])]; + tensor var_2208 = squeeze(axes = var_2208_axes_0, x = mlp_out_9)[name = string("op_2208")]; + tensor var_2212 = const()[name = string("op_2212"), val = tensor([0, 2, 1])]; + int32 var_2218 = const()[name = string("op_2218"), val = int32(-1)]; + fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_2212, x = var_2208)[name = string("transpose_45")]; + tensor var_2220 = mul(x = x_75, y = const_37_promoted)[name = string("op_2220")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_2218, interleave = input_113_interleave_0, values = (x_75, var_2220))[name = string("input_113")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_2215_to_fp16 = const()[name = string("op_2215_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2215_to_fp16, x = input_113)[name = string("normed_113_cast_fp16")]; + tensor var_2225_split_sizes_0 = const()[name = string("op_2225_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2225_axis_0 = const()[name = string("op_2225_axis_0"), val = int32(-1)]; + tensor var_2225_0, tensor var_2225_1 = split(axis = var_2225_axis_0, split_sizes = var_2225_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2225")]; + tensor hidden_states_43 = mul(x = var_2225_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 9472])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 1, 9728])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_2253 = const()[name = string("op_2253"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_2254 = transpose(perm = var_2253, x = hidden_states_45_cast_fp16)[name = string("transpose_44")]; + tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_2254)[name = string("input_115")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_2273 = const()[name = string("op_2273"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_2274_cast_fp16 = transpose(perm = var_2273, x = per_layer_slice_9_cast_fp16)[name = string("transpose_43")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_2274_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_117_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403127744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403455488))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_2290_axes_0 = const()[name = string("op_2290_axes_0"), val = tensor([2])]; + tensor var_2290_cast_fp16 = squeeze(axes = var_2290_axes_0, x = gated_29_cast_fp16)[name = string("op_2290_cast_fp16")]; + tensor var_2294 = const()[name = string("op_2294"), val = tensor([0, 2, 1])]; + int32 var_2300 = const()[name = string("op_2300"), val = int32(-1)]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_2294, x = var_2290_cast_fp16)[name = string("transpose_42")]; + tensor var_2302_cast_fp16 = mul(x = x_77_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2302_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_2300, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2302_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_2297_to_fp16 = const()[name = string("op_2297_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2297_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor var_2307_split_sizes_0 = const()[name = string("op_2307_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2307_axis_0 = const()[name = string("op_2307_axis_0"), val = int32(-1)]; + tensor var_2307_cast_fp16_0, tensor var_2307_cast_fp16_1 = split(axis = var_2307_axis_0, split_sizes = var_2307_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2307_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403458112)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_2307_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = tensor([0x1.c6p-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_39_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + int32 var_2322 = const()[name = string("op_2322"), val = int32(-1)]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2324_cast_fp16 = mul(x = x_79_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2324_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_2322, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2324_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2319_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor var_2329_split_sizes_0 = const()[name = string("op_2329_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2329_axis_0 = const()[name = string("op_2329_axis_0"), val = int32(-1)]; + tensor var_2329_cast_fp16_0, tensor var_2329_cast_fp16_1 = split(axis = var_2329_axis_0, split_sizes = var_2329_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2329_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403463296)))]; + tensor h_31_cast_fp16 = mul(x = var_2329_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_2335 = const()[name = string("op_2335"), val = tensor([0, 2, 1])]; + tensor var_2338_axes_0 = const()[name = string("op_2338_axes_0"), val = tensor([2])]; + tensor var_2336_cast_fp16 = transpose(perm = var_2335, x = h_31_cast_fp16)[name = string("transpose_41")]; + tensor var_2338_cast_fp16 = expand_dims(axes = var_2338_axes_0, x = var_2336_cast_fp16)[name = string("op_2338_cast_fp16")]; + string var_2354_pad_type_0 = const()[name = string("op_2354_pad_type_0"), val = string("valid")]; + tensor var_2354_strides_0 = const()[name = string("op_2354_strides_0"), val = tensor([1, 1])]; + tensor var_2354_pad_0 = const()[name = string("op_2354_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2354_dilations_0 = const()[name = string("op_2354_dilations_0"), val = tensor([1, 1])]; + int32 var_2354_groups_0 = const()[name = string("op_2354_groups_0"), val = int32(1)]; + tensor var_2354 = conv(dilations = var_2354_dilations_0, groups = var_2354_groups_0, pad = var_2354_pad_0, pad_type = var_2354_pad_type_0, strides = var_2354_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_2338_cast_fp16)[name = string("op_2354")]; + tensor var_2359 = const()[name = string("op_2359"), val = tensor([1, 8, 256, 1])]; + tensor var_2360 = reshape(shape = var_2359, x = var_2354)[name = string("op_2360")]; + tensor var_2365 = const()[name = string("op_2365"), val = tensor([0, 1, 3, 2])]; + tensor var_2375 = const()[name = string("op_2375"), val = tensor([1, 8, 256])]; + tensor var_2366 = transpose(perm = var_2365, x = var_2360)[name = string("transpose_40")]; + tensor x_81 = reshape(shape = var_2375, x = var_2366)[name = string("x_81")]; + int32 var_2381 = const()[name = string("op_2381"), val = int32(-1)]; + fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; + tensor var_2383 = mul(x = x_81, y = const_41_promoted)[name = string("op_2383")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_2381, interleave = input_125_interleave_0, values = (x_81, var_2383))[name = string("input_125")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_2378_to_fp16 = const()[name = string("op_2378_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2378_to_fp16, x = input_125)[name = string("normed_125_cast_fp16")]; + tensor var_2388_split_sizes_0 = const()[name = string("op_2388_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2388_axis_0 = const()[name = string("op_2388_axis_0"), val = int32(-1)]; + tensor var_2388_0, tensor var_2388_1 = split(axis = var_2388_axis_0, split_sizes = var_2388_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2388")]; + tensor var_2390 = mul(x = var_2388_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2390")]; + tensor var_2395 = const()[name = string("op_2395"), val = tensor([1, 8, 1, 256])]; + tensor q_33 = reshape(shape = var_2395, x = var_2390)[name = string("q_33")]; + tensor var_2397_cast_fp16 = mul(x = q_33, y = cos_s)[name = string("op_2397_cast_fp16")]; + tensor var_2398_split_sizes_0 = const()[name = string("op_2398_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2398_axis_0 = const()[name = string("op_2398_axis_0"), val = int32(-1)]; + tensor var_2398_0, tensor var_2398_1 = split(axis = var_2398_axis_0, split_sizes = var_2398_split_sizes_0, x = q_33)[name = string("op_2398")]; + fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; + tensor var_2400 = mul(x = var_2398_1, y = const_42_promoted)[name = string("op_2400")]; + int32 var_2402 = const()[name = string("op_2402"), val = int32(-1)]; + bool var_2403_interleave_0 = const()[name = string("op_2403_interleave_0"), val = bool(false)]; + tensor var_2403 = concat(axis = var_2402, interleave = var_2403_interleave_0, values = (var_2400, var_2398_0))[name = string("op_2403")]; + tensor var_2404_cast_fp16 = mul(x = var_2403, y = sin_s)[name = string("op_2404_cast_fp16")]; + tensor q_35_cast_fp16 = add(x = var_2397_cast_fp16, y = var_2404_cast_fp16)[name = string("q_35_cast_fp16")]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_35_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_83_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_sliding)[name = string("x_83_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_83_cast_fp16)[name = string("reduce_max_5")]; + tensor var_2436 = sub(x = x_83_cast_fp16, y = reduce_max_5)[name = string("op_2436")]; + tensor var_2442 = exp(x = var_2436)[name = string("op_2442")]; + tensor var_2452_axes_0 = const()[name = string("op_2452_axes_0"), val = tensor([-1])]; + bool var_2452_keep_dims_0 = const()[name = string("op_2452_keep_dims_0"), val = bool(true)]; + tensor var_2452 = reduce_sum(axes = var_2452_axes_0, keep_dims = var_2452_keep_dims_0, x = var_2442)[name = string("op_2452")]; + tensor var_2458_cast_fp16 = real_div(x = var_2442, y = var_2452)[name = string("op_2458_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_2458_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_2469 = const()[name = string("op_2469"), val = tensor([0, 2, 1, 3])]; + tensor var_2476 = const()[name = string("op_2476"), val = tensor([1, 1, -1])]; + tensor var_2470_cast_fp16 = transpose(perm = var_2469, x = attn_output_31_cast_fp16)[name = string("transpose_39")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_2476, x = var_2470_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_2481 = const()[name = string("op_2481"), val = tensor([0, 2, 1])]; + string var_2497_pad_type_0 = const()[name = string("op_2497_pad_type_0"), val = string("valid")]; + int32 var_2497_groups_0 = const()[name = string("op_2497_groups_0"), val = int32(1)]; + tensor var_2497_strides_0 = const()[name = string("op_2497_strides_0"), val = tensor([1])]; + tensor var_2497_pad_0 = const()[name = string("op_2497_pad_0"), val = tensor([0, 0])]; + tensor var_2497_dilations_0 = const()[name = string("op_2497_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403468480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406089984))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2482_cast_fp16 = transpose(perm = var_2481, x = attn_output_33_cast_fp16)[name = string("transpose_38")]; + tensor var_2497_cast_fp16 = conv(dilations = var_2497_dilations_0, groups = var_2497_groups_0, pad = var_2497_pad_0, pad_type = var_2497_pad_type_0, strides = var_2497_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_2482_cast_fp16)[name = string("op_2497_cast_fp16")]; + tensor var_2501 = const()[name = string("op_2501"), val = tensor([0, 2, 1])]; + int32 var_2507 = const()[name = string("op_2507"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_87_cast_fp16 = transpose(perm = var_2501, x = var_2497_cast_fp16)[name = string("transpose_37")]; + tensor var_2509_cast_fp16 = mul(x = x_87_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2509_cast_fp16")]; + bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)]; + tensor input_129_cast_fp16 = concat(axis = var_2507, interleave = input_129_interleave_0, values = (x_87_cast_fp16, var_2509_cast_fp16))[name = string("input_129_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_2504_to_fp16 = const()[name = string("op_2504_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_2504_to_fp16, x = input_129_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_2514_split_sizes_0 = const()[name = string("op_2514_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2514_axis_0 = const()[name = string("op_2514_axis_0"), val = int32(-1)]; + tensor var_2514_cast_fp16_0, tensor var_2514_cast_fp16_1 = split(axis = var_2514_axis_0, split_sizes = var_2514_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_2514_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406092608)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_2514_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_2523 = const()[name = string("op_2523"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2525_cast_fp16 = mul(x = x_89_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2525_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_2523, interleave = input_131_interleave_0, values = (x_89_cast_fp16, var_2525_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_2520_to_fp16 = const()[name = string("op_2520_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_2520_to_fp16, x = input_131_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor var_2530_split_sizes_0 = const()[name = string("op_2530_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2530_axis_0 = const()[name = string("op_2530_axis_0"), val = int32(-1)]; + tensor var_2530_cast_fp16_0, tensor var_2530_cast_fp16_1 = split(axis = var_2530_axis_0, split_sizes = var_2530_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_2530_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406097792)))]; + tensor h_33_cast_fp16 = mul(x = var_2530_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_2541 = const()[name = string("op_2541"), val = tensor([0, 2, 1])]; + tensor input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor([2])]; + tensor var_2542 = transpose(perm = var_2541, x = h_33_cast_fp16)[name = string("transpose_36")]; + tensor input_133 = expand_dims(axes = input_133_axes_0, x = var_2542)[name = string("input_133")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_133)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_133)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_135 = mul(x = gate_23, y = up_11)[name = string("input_135")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_135)[name = string("mlp_out_11")]; + tensor var_2582_axes_0 = const()[name = string("op_2582_axes_0"), val = tensor([2])]; + tensor var_2582 = squeeze(axes = var_2582_axes_0, x = mlp_out_11)[name = string("op_2582")]; + tensor var_2586 = const()[name = string("op_2586"), val = tensor([0, 2, 1])]; + int32 var_2592 = const()[name = string("op_2592"), val = int32(-1)]; + fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; + tensor x_91 = transpose(perm = var_2586, x = var_2582)[name = string("transpose_35")]; + tensor var_2594 = mul(x = x_91, y = const_45_promoted)[name = string("op_2594")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137 = concat(axis = var_2592, interleave = input_137_interleave_0, values = (x_91, var_2594))[name = string("input_137")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_2589_to_fp16 = const()[name = string("op_2589_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_2589_to_fp16, x = input_137)[name = string("normed_137_cast_fp16")]; + tensor var_2599_split_sizes_0 = const()[name = string("op_2599_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2599_axis_0 = const()[name = string("op_2599_axis_0"), val = int32(-1)]; + tensor var_2599_0, tensor var_2599_1 = split(axis = var_2599_axis_0, split_sizes = var_2599_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_2599")]; + tensor hidden_states_53 = mul(x = var_2599_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 9728])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 1, 9984])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_2627 = const()[name = string("op_2627"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_2628 = transpose(perm = var_2627, x = hidden_states_55_cast_fp16)[name = string("transpose_34")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_2628)[name = string("input_139")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_139)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_2647 = const()[name = string("op_2647"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_2648_cast_fp16 = transpose(perm = var_2647, x = per_layer_slice_11_cast_fp16)[name = string("transpose_33")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_2648_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_141_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_141_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406102976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406430720))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_141_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_2664_axes_0 = const()[name = string("op_2664_axes_0"), val = tensor([2])]; + tensor var_2664_cast_fp16 = squeeze(axes = var_2664_axes_0, x = gated_35_cast_fp16)[name = string("op_2664_cast_fp16")]; + tensor var_2668 = const()[name = string("op_2668"), val = tensor([0, 2, 1])]; + int32 var_2674 = const()[name = string("op_2674"), val = int32(-1)]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_93_cast_fp16 = transpose(perm = var_2668, x = var_2664_cast_fp16)[name = string("transpose_32")]; + tensor var_2676_cast_fp16 = mul(x = x_93_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2676_cast_fp16")]; + bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; + tensor input_143_cast_fp16 = concat(axis = var_2674, interleave = input_143_interleave_0, values = (x_93_cast_fp16, var_2676_cast_fp16))[name = string("input_143_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_2671_to_fp16 = const()[name = string("op_2671_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_2671_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_2681_split_sizes_0 = const()[name = string("op_2681_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2681_axis_0 = const()[name = string("op_2681_axis_0"), val = int32(-1)]; + tensor var_2681_cast_fp16_0, tensor var_2681_cast_fp16_1 = split(axis = var_2681_axis_0, split_sizes = var_2681_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_2681_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406433344)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_2681_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor([0x1.c4p-1])]; + tensor x_95_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_95_cast_fp16")]; + int32 var_2696 = const()[name = string("op_2696"), val = int32(-1)]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2698_cast_fp16 = mul(x = x_95_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2698_cast_fp16")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145_cast_fp16 = concat(axis = var_2696, interleave = input_145_interleave_0, values = (x_95_cast_fp16, var_2698_cast_fp16))[name = string("input_145_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_2693_to_fp16 = const()[name = string("op_2693_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_2693_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor var_2703_split_sizes_0 = const()[name = string("op_2703_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2703_axis_0 = const()[name = string("op_2703_axis_0"), val = int32(-1)]; + tensor var_2703_cast_fp16_0, tensor var_2703_cast_fp16_1 = split(axis = var_2703_axis_0, split_sizes = var_2703_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_2703_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406438528)))]; + tensor h_37_cast_fp16 = mul(x = var_2703_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_2709 = const()[name = string("op_2709"), val = tensor([0, 2, 1])]; + tensor var_2712_axes_0 = const()[name = string("op_2712_axes_0"), val = tensor([2])]; + tensor var_2710_cast_fp16 = transpose(perm = var_2709, x = h_37_cast_fp16)[name = string("transpose_31")]; + tensor var_2712_cast_fp16 = expand_dims(axes = var_2712_axes_0, x = var_2710_cast_fp16)[name = string("op_2712_cast_fp16")]; + string var_2728_pad_type_0 = const()[name = string("op_2728_pad_type_0"), val = string("valid")]; + tensor var_2728_strides_0 = const()[name = string("op_2728_strides_0"), val = tensor([1, 1])]; + tensor var_2728_pad_0 = const()[name = string("op_2728_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2728_dilations_0 = const()[name = string("op_2728_dilations_0"), val = tensor([1, 1])]; + int32 var_2728_groups_0 = const()[name = string("op_2728_groups_0"), val = int32(1)]; + tensor var_2728 = conv(dilations = var_2728_dilations_0, groups = var_2728_groups_0, pad = var_2728_pad_0, pad_type = var_2728_pad_type_0, strides = var_2728_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_2712_cast_fp16)[name = string("op_2728")]; + tensor var_2733 = const()[name = string("op_2733"), val = tensor([1, 8, 256, 1])]; + tensor var_2734 = reshape(shape = var_2733, x = var_2728)[name = string("op_2734")]; + tensor var_2739 = const()[name = string("op_2739"), val = tensor([0, 1, 3, 2])]; + tensor var_2749 = const()[name = string("op_2749"), val = tensor([1, 8, 256])]; + tensor var_2740 = transpose(perm = var_2739, x = var_2734)[name = string("transpose_30")]; + tensor x_97 = reshape(shape = var_2749, x = var_2740)[name = string("x_97")]; + int32 var_2755 = const()[name = string("op_2755"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor var_2757 = mul(x = x_97, y = const_49_promoted)[name = string("op_2757")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149 = concat(axis = var_2755, interleave = input_149_interleave_0, values = (x_97, var_2757))[name = string("input_149")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_2752_to_fp16 = const()[name = string("op_2752_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_2752_to_fp16, x = input_149)[name = string("normed_149_cast_fp16")]; + tensor var_2762_split_sizes_0 = const()[name = string("op_2762_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2762_axis_0 = const()[name = string("op_2762_axis_0"), val = int32(-1)]; + tensor var_2762_0, tensor var_2762_1 = split(axis = var_2762_axis_0, split_sizes = var_2762_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_2762")]; + tensor var_2764 = mul(x = var_2762_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_2764")]; + tensor var_2769 = const()[name = string("op_2769"), val = tensor([1, 8, 1, 256])]; + tensor q_39 = reshape(shape = var_2769, x = var_2764)[name = string("q_39")]; + tensor var_2771_cast_fp16 = mul(x = q_39, y = cos_s)[name = string("op_2771_cast_fp16")]; + tensor var_2772_split_sizes_0 = const()[name = string("op_2772_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2772_axis_0 = const()[name = string("op_2772_axis_0"), val = int32(-1)]; + tensor var_2772_0, tensor var_2772_1 = split(axis = var_2772_axis_0, split_sizes = var_2772_split_sizes_0, x = q_39)[name = string("op_2772")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_2774 = mul(x = var_2772_1, y = const_50_promoted)[name = string("op_2774")]; + int32 var_2776 = const()[name = string("op_2776"), val = int32(-1)]; + bool var_2777_interleave_0 = const()[name = string("op_2777_interleave_0"), val = bool(false)]; + tensor var_2777 = concat(axis = var_2776, interleave = var_2777_interleave_0, values = (var_2774, var_2772_0))[name = string("op_2777")]; + tensor var_2778_cast_fp16 = mul(x = var_2777, y = sin_s)[name = string("op_2778_cast_fp16")]; + tensor q_41_cast_fp16 = add(x = var_2771_cast_fp16, y = var_2778_cast_fp16)[name = string("q_41_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_41_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_99_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_99_cast_fp16)[name = string("reduce_max_6")]; + tensor var_2810 = sub(x = x_99_cast_fp16, y = reduce_max_6)[name = string("op_2810")]; + tensor var_2816 = exp(x = var_2810)[name = string("op_2816")]; + tensor var_2826_axes_0 = const()[name = string("op_2826_axes_0"), val = tensor([-1])]; + bool var_2826_keep_dims_0 = const()[name = string("op_2826_keep_dims_0"), val = bool(true)]; + tensor var_2826 = reduce_sum(axes = var_2826_axes_0, keep_dims = var_2826_keep_dims_0, x = var_2816)[name = string("op_2826")]; + tensor var_2832_cast_fp16 = real_div(x = var_2816, y = var_2826)[name = string("op_2832_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_2832_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_2843 = const()[name = string("op_2843"), val = tensor([0, 2, 1, 3])]; + tensor var_2850 = const()[name = string("op_2850"), val = tensor([1, 1, -1])]; + tensor var_2844_cast_fp16 = transpose(perm = var_2843, x = attn_output_37_cast_fp16)[name = string("transpose_29")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_2850, x = var_2844_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_2855 = const()[name = string("op_2855"), val = tensor([0, 2, 1])]; + string var_2871_pad_type_0 = const()[name = string("op_2871_pad_type_0"), val = string("valid")]; + int32 var_2871_groups_0 = const()[name = string("op_2871_groups_0"), val = int32(1)]; + tensor var_2871_strides_0 = const()[name = string("op_2871_strides_0"), val = tensor([1])]; + tensor var_2871_pad_0 = const()[name = string("op_2871_pad_0"), val = tensor([0, 0])]; + tensor var_2871_dilations_0 = const()[name = string("op_2871_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406443712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409065216))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2856_cast_fp16 = transpose(perm = var_2855, x = attn_output_39_cast_fp16)[name = string("transpose_28")]; + tensor var_2871_cast_fp16 = conv(dilations = var_2871_dilations_0, groups = var_2871_groups_0, pad = var_2871_pad_0, pad_type = var_2871_pad_type_0, strides = var_2871_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_2856_cast_fp16)[name = string("op_2871_cast_fp16")]; + tensor var_2875 = const()[name = string("op_2875"), val = tensor([0, 2, 1])]; + int32 var_2881 = const()[name = string("op_2881"), val = int32(-1)]; + fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_103_cast_fp16 = transpose(perm = var_2875, x = var_2871_cast_fp16)[name = string("transpose_27")]; + tensor var_2883_cast_fp16 = mul(x = x_103_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_2883_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_2881, interleave = input_153_interleave_0, values = (x_103_cast_fp16, var_2883_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_2878_to_fp16 = const()[name = string("op_2878_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_2878_to_fp16, x = input_153_cast_fp16)[name = string("normed_153_cast_fp16")]; + tensor var_2888_split_sizes_0 = const()[name = string("op_2888_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2888_axis_0 = const()[name = string("op_2888_axis_0"), val = int32(-1)]; + tensor var_2888_cast_fp16_0, tensor var_2888_cast_fp16_1 = split(axis = var_2888_axis_0, split_sizes = var_2888_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_2888_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409067840)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_2888_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_95_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_105_cast_fp16")]; + int32 var_2897 = const()[name = string("op_2897"), val = int32(-1)]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2899_cast_fp16 = mul(x = x_105_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2899_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_2897, interleave = input_155_interleave_0, values = (x_105_cast_fp16, var_2899_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_2894_to_fp16 = const()[name = string("op_2894_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_2894_to_fp16, x = input_155_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_2904_split_sizes_0 = const()[name = string("op_2904_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2904_axis_0 = const()[name = string("op_2904_axis_0"), val = int32(-1)]; + tensor var_2904_cast_fp16_0, tensor var_2904_cast_fp16_1 = split(axis = var_2904_axis_0, split_sizes = var_2904_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_2904_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409073024)))]; + tensor h_39_cast_fp16 = mul(x = var_2904_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_2915 = const()[name = string("op_2915"), val = tensor([0, 2, 1])]; + tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; + tensor var_2916 = transpose(perm = var_2915, x = h_39_cast_fp16)[name = string("transpose_26")]; + tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_2916)[name = string("input_157")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_157)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_157)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_159 = mul(x = gate_27, y = up_13)[name = string("input_159")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_159)[name = string("mlp_out_13")]; + tensor var_2956_axes_0 = const()[name = string("op_2956_axes_0"), val = tensor([2])]; + tensor var_2956 = squeeze(axes = var_2956_axes_0, x = mlp_out_13)[name = string("op_2956")]; + tensor var_2960 = const()[name = string("op_2960"), val = tensor([0, 2, 1])]; + int32 var_2966 = const()[name = string("op_2966"), val = int32(-1)]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor x_107 = transpose(perm = var_2960, x = var_2956)[name = string("transpose_25")]; + tensor var_2968 = mul(x = x_107, y = const_53_promoted)[name = string("op_2968")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161 = concat(axis = var_2966, interleave = input_161_interleave_0, values = (x_107, var_2968))[name = string("input_161")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_2963_to_fp16 = const()[name = string("op_2963_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_2963_to_fp16, x = input_161)[name = string("normed_161_cast_fp16")]; + tensor var_2973_split_sizes_0 = const()[name = string("op_2973_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2973_axis_0 = const()[name = string("op_2973_axis_0"), val = int32(-1)]; + tensor var_2973_0, tensor var_2973_1 = split(axis = var_2973_axis_0, split_sizes = var_2973_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_2973")]; + tensor hidden_states_63 = mul(x = var_2973_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_105_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 9984])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 1, 10240])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_3001 = const()[name = string("op_3001"), val = tensor([0, 2, 1])]; + tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; + tensor var_3002 = transpose(perm = var_3001, x = hidden_states_65_cast_fp16)[name = string("transpose_24")]; + tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_3002)[name = string("input_163")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_163)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_3021 = const()[name = string("op_3021"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_3022_cast_fp16 = transpose(perm = var_3021, x = per_layer_slice_13_cast_fp16)[name = string("transpose_23")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_3022_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_165_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_165_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409078208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409405952))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_165_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_3038_axes_0 = const()[name = string("op_3038_axes_0"), val = tensor([2])]; + tensor var_3038_cast_fp16 = squeeze(axes = var_3038_axes_0, x = gated_41_cast_fp16)[name = string("op_3038_cast_fp16")]; + tensor var_3042 = const()[name = string("op_3042"), val = tensor([0, 2, 1])]; + int32 var_3048 = const()[name = string("op_3048"), val = int32(-1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_109_cast_fp16 = transpose(perm = var_3042, x = var_3038_cast_fp16)[name = string("transpose_22")]; + tensor var_3050_cast_fp16 = mul(x = x_109_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_3050_cast_fp16")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167_cast_fp16 = concat(axis = var_3048, interleave = input_167_interleave_0, values = (x_109_cast_fp16, var_3050_cast_fp16))[name = string("input_167_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_3045_to_fp16 = const()[name = string("op_3045_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3045_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor var_3055_split_sizes_0 = const()[name = string("op_3055_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3055_axis_0 = const()[name = string("op_3055_axis_0"), val = int32(-1)]; + tensor var_3055_cast_fp16_0, tensor var_3055_cast_fp16_1 = split(axis = var_3055_axis_0, split_sizes = var_3055_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3055_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409408576)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_3055_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = tensor([0x1.b6p-1])]; + tensor x_111_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_55_promoted_to_fp16)[name = string("x_111_cast_fp16")]; + int32 var_3070 = const()[name = string("op_3070"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3072_cast_fp16 = mul(x = x_111_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3072_cast_fp16")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169_cast_fp16 = concat(axis = var_3070, interleave = input_169_interleave_0, values = (x_111_cast_fp16, var_3072_cast_fp16))[name = string("input_169_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_3067_to_fp16 = const()[name = string("op_3067_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3067_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_3077_split_sizes_0 = const()[name = string("op_3077_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3077_axis_0 = const()[name = string("op_3077_axis_0"), val = int32(-1)]; + tensor var_3077_cast_fp16_0, tensor var_3077_cast_fp16_1 = split(axis = var_3077_axis_0, split_sizes = var_3077_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3077_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409413760)))]; + tensor h_43_cast_fp16 = mul(x = var_3077_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_3083 = const()[name = string("op_3083"), val = tensor([0, 2, 1])]; + tensor var_3086_axes_0 = const()[name = string("op_3086_axes_0"), val = tensor([2])]; + tensor var_3084_cast_fp16 = transpose(perm = var_3083, x = h_43_cast_fp16)[name = string("transpose_21")]; + tensor var_3086_cast_fp16 = expand_dims(axes = var_3086_axes_0, x = var_3084_cast_fp16)[name = string("op_3086_cast_fp16")]; + string var_3102_pad_type_0 = const()[name = string("op_3102_pad_type_0"), val = string("valid")]; + tensor var_3102_strides_0 = const()[name = string("op_3102_strides_0"), val = tensor([1, 1])]; + tensor var_3102_pad_0 = const()[name = string("op_3102_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3102_dilations_0 = const()[name = string("op_3102_dilations_0"), val = tensor([1, 1])]; + int32 var_3102_groups_0 = const()[name = string("op_3102_groups_0"), val = int32(1)]; + tensor var_3102 = conv(dilations = var_3102_dilations_0, groups = var_3102_groups_0, pad = var_3102_pad_0, pad_type = var_3102_pad_type_0, strides = var_3102_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3086_cast_fp16)[name = string("op_3102")]; + tensor var_3107 = const()[name = string("op_3107"), val = tensor([1, 8, 256, 1])]; + tensor var_3108 = reshape(shape = var_3107, x = var_3102)[name = string("op_3108")]; + tensor var_3113 = const()[name = string("op_3113"), val = tensor([0, 1, 3, 2])]; + tensor var_3123 = const()[name = string("op_3123"), val = tensor([1, 8, 256])]; + tensor var_3114 = transpose(perm = var_3113, x = var_3108)[name = string("transpose_20")]; + tensor x_113 = reshape(shape = var_3123, x = var_3114)[name = string("x_113")]; + int32 var_3129 = const()[name = string("op_3129"), val = int32(-1)]; + fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; + tensor var_3131 = mul(x = x_113, y = const_57_promoted)[name = string("op_3131")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173 = concat(axis = var_3129, interleave = input_173_interleave_0, values = (x_113, var_3131))[name = string("input_173")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_3126_to_fp16 = const()[name = string("op_3126_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3126_to_fp16, x = input_173)[name = string("normed_173_cast_fp16")]; + tensor var_3136_split_sizes_0 = const()[name = string("op_3136_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3136_axis_0 = const()[name = string("op_3136_axis_0"), val = int32(-1)]; + tensor var_3136_0, tensor var_3136_1 = split(axis = var_3136_axis_0, split_sizes = var_3136_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3136")]; + tensor var_3138 = mul(x = var_3136_0, y = layers_0_self_attn_q_norm_weight)[name = string("op_3138")]; + tensor var_3143 = const()[name = string("op_3143"), val = tensor([1, 8, 1, 256])]; + tensor q_45 = reshape(shape = var_3143, x = var_3138)[name = string("q_45")]; + tensor var_3145_cast_fp16 = mul(x = q_45, y = cos_s)[name = string("op_3145_cast_fp16")]; + tensor var_3146_split_sizes_0 = const()[name = string("op_3146_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3146_axis_0 = const()[name = string("op_3146_axis_0"), val = int32(-1)]; + tensor var_3146_0, tensor var_3146_1 = split(axis = var_3146_axis_0, split_sizes = var_3146_split_sizes_0, x = q_45)[name = string("op_3146")]; + fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)]; + tensor var_3148 = mul(x = var_3146_1, y = const_58_promoted)[name = string("op_3148")]; + int32 var_3150 = const()[name = string("op_3150"), val = int32(-1)]; + bool var_3151_interleave_0 = const()[name = string("op_3151_interleave_0"), val = bool(false)]; + tensor var_3151 = concat(axis = var_3150, interleave = var_3151_interleave_0, values = (var_3148, var_3146_0))[name = string("op_3151")]; + tensor var_3152_cast_fp16 = mul(x = var_3151, y = sin_s)[name = string("op_3152_cast_fp16")]; + tensor q_47_cast_fp16 = add(x = var_3145_cast_fp16, y = var_3152_cast_fp16)[name = string("q_47_cast_fp16")]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_47_cast_fp16, y = transpose_36_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_115_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_115_cast_fp16)[name = string("reduce_max_7")]; + tensor var_3184 = sub(x = x_115_cast_fp16, y = reduce_max_7)[name = string("op_3184")]; + tensor var_3190 = exp(x = var_3184)[name = string("op_3190")]; + tensor var_3200_axes_0 = const()[name = string("op_3200_axes_0"), val = tensor([-1])]; + bool var_3200_keep_dims_0 = const()[name = string("op_3200_keep_dims_0"), val = bool(true)]; + tensor var_3200 = reduce_sum(axes = var_3200_axes_0, keep_dims = var_3200_keep_dims_0, x = var_3190)[name = string("op_3200")]; + tensor var_3206_cast_fp16 = real_div(x = var_3190, y = var_3200)[name = string("op_3206_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_3206_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_3217 = const()[name = string("op_3217"), val = tensor([0, 2, 1, 3])]; + tensor var_3224 = const()[name = string("op_3224"), val = tensor([1, 1, -1])]; + tensor var_3218_cast_fp16 = transpose(perm = var_3217, x = attn_output_43_cast_fp16)[name = string("transpose_19")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_3224, x = var_3218_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_3229 = const()[name = string("op_3229"), val = tensor([0, 2, 1])]; + string var_3245_pad_type_0 = const()[name = string("op_3245_pad_type_0"), val = string("valid")]; + int32 var_3245_groups_0 = const()[name = string("op_3245_groups_0"), val = int32(1)]; + tensor var_3245_strides_0 = const()[name = string("op_3245_strides_0"), val = tensor([1])]; + tensor var_3245_pad_0 = const()[name = string("op_3245_pad_0"), val = tensor([0, 0])]; + tensor var_3245_dilations_0 = const()[name = string("op_3245_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409418944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412040448))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3230_cast_fp16 = transpose(perm = var_3229, x = attn_output_45_cast_fp16)[name = string("transpose_18")]; + tensor var_3245_cast_fp16 = conv(dilations = var_3245_dilations_0, groups = var_3245_groups_0, pad = var_3245_pad_0, pad_type = var_3245_pad_type_0, strides = var_3245_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_3230_cast_fp16)[name = string("op_3245_cast_fp16")]; + tensor var_3249 = const()[name = string("op_3249"), val = tensor([0, 2, 1])]; + int32 var_3255 = const()[name = string("op_3255"), val = int32(-1)]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_119_cast_fp16 = transpose(perm = var_3249, x = var_3245_cast_fp16)[name = string("transpose_17")]; + tensor var_3257_cast_fp16 = mul(x = x_119_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3257_cast_fp16")]; + bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; + tensor input_177_cast_fp16 = concat(axis = var_3255, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_3257_cast_fp16))[name = string("input_177_cast_fp16")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_3252_to_fp16 = const()[name = string("op_3252_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3252_to_fp16, x = input_177_cast_fp16)[name = string("normed_177_cast_fp16")]; + tensor var_3262_split_sizes_0 = const()[name = string("op_3262_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3262_axis_0 = const()[name = string("op_3262_axis_0"), val = int32(-1)]; + tensor var_3262_cast_fp16_0, tensor var_3262_cast_fp16_1 = split(axis = var_3262_axis_0, split_sizes = var_3262_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3262_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412043072)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_3262_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_111_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_121_cast_fp16")]; + int32 var_3271 = const()[name = string("op_3271"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3273_cast_fp16 = mul(x = x_121_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3273_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_3271, interleave = input_179_interleave_0, values = (x_121_cast_fp16, var_3273_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_3268_to_fp16 = const()[name = string("op_3268_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3268_to_fp16, x = input_179_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor var_3278_split_sizes_0 = const()[name = string("op_3278_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3278_axis_0 = const()[name = string("op_3278_axis_0"), val = int32(-1)]; + tensor var_3278_cast_fp16_0, tensor var_3278_cast_fp16_1 = split(axis = var_3278_axis_0, split_sizes = var_3278_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3278_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412048256)))]; + tensor h_45_cast_fp16 = mul(x = var_3278_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_3289 = const()[name = string("op_3289"), val = tensor([0, 2, 1])]; + tensor input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor([2])]; + tensor var_3290 = transpose(perm = var_3289, x = h_45_cast_fp16)[name = string("transpose_16")]; + tensor input_181 = expand_dims(axes = input_181_axes_0, x = var_3290)[name = string("input_181")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_183 = mul(x = gate_31, y = up_15)[name = string("input_183")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_15")]; + tensor var_3330_axes_0 = const()[name = string("op_3330_axes_0"), val = tensor([2])]; + tensor var_3330 = squeeze(axes = var_3330_axes_0, x = mlp_out_15)[name = string("op_3330")]; + tensor var_3334 = const()[name = string("op_3334"), val = tensor([0, 2, 1])]; + int32 var_3340 = const()[name = string("op_3340"), val = int32(-1)]; + fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; + tensor x_123 = transpose(perm = var_3334, x = var_3330)[name = string("transpose_15")]; + tensor var_3342 = mul(x = x_123, y = const_61_promoted)[name = string("op_3342")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_3340, interleave = input_185_interleave_0, values = (x_123, var_3342))[name = string("input_185")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_3337_to_fp16 = const()[name = string("op_3337_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3337_to_fp16, x = input_185)[name = string("normed_185_cast_fp16")]; + tensor var_3347_split_sizes_0 = const()[name = string("op_3347_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3347_axis_0 = const()[name = string("op_3347_axis_0"), val = int32(-1)]; + tensor var_3347_0, tensor var_3347_1 = split(axis = var_3347_axis_0, split_sizes = var_3347_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3347")]; + tensor hidden_states_73 = mul(x = var_3347_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_121_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 10240])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 1, 10496])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_3375 = const()[name = string("op_3375"), val = tensor([0, 2, 1])]; + tensor input_187_axes_0 = const()[name = string("input_187_axes_0"), val = tensor([2])]; + tensor var_3376 = transpose(perm = var_3375, x = hidden_states_75_cast_fp16)[name = string("transpose_14")]; + tensor input_187 = expand_dims(axes = input_187_axes_0, x = var_3376)[name = string("input_187")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_187)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_3395 = const()[name = string("op_3395"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_3396_cast_fp16 = transpose(perm = var_3395, x = per_layer_slice_15_cast_fp16)[name = string("transpose_13")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_3396_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_189_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_189_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412053440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412381184))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_189_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_3412_axes_0 = const()[name = string("op_3412_axes_0"), val = tensor([2])]; + tensor var_3412_cast_fp16 = squeeze(axes = var_3412_axes_0, x = gated_47_cast_fp16)[name = string("op_3412_cast_fp16")]; + tensor var_3416 = const()[name = string("op_3416"), val = tensor([0, 2, 1])]; + int32 var_3422 = const()[name = string("op_3422"), val = int32(-1)]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_125_cast_fp16 = transpose(perm = var_3416, x = var_3412_cast_fp16)[name = string("transpose_12")]; + tensor var_3424_cast_fp16 = mul(x = x_125_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_3424_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_3422, interleave = input_191_interleave_0, values = (x_125_cast_fp16, var_3424_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_3419_to_fp16 = const()[name = string("op_3419_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3419_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor var_3429_split_sizes_0 = const()[name = string("op_3429_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3429_axis_0 = const()[name = string("op_3429_axis_0"), val = int32(-1)]; + tensor var_3429_cast_fp16_0, tensor var_3429_cast_fp16_1 = split(axis = var_3429_axis_0, split_sizes = var_3429_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3429_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412383808)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_3429_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor([0x1.9ep-1])]; + tensor x_127_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_63_promoted_to_fp16)[name = string("x_127_cast_fp16")]; + int32 var_3444 = const()[name = string("op_3444"), val = int32(-1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3446_cast_fp16 = mul(x = x_127_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_3446_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_3444, interleave = input_193_interleave_0, values = (x_127_cast_fp16, var_3446_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_3441_to_fp16 = const()[name = string("op_3441_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3441_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor var_3451_split_sizes_0 = const()[name = string("op_3451_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3451_axis_0 = const()[name = string("op_3451_axis_0"), val = int32(-1)]; + tensor var_3451_cast_fp16_0, tensor var_3451_cast_fp16_1 = split(axis = var_3451_axis_0, split_sizes = var_3451_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3451_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412388992)))]; + tensor h_49_cast_fp16 = mul(x = var_3451_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_3457 = const()[name = string("op_3457"), val = tensor([0, 2, 1])]; + tensor var_3460_axes_0 = const()[name = string("op_3460_axes_0"), val = tensor([2])]; + tensor var_3458_cast_fp16 = transpose(perm = var_3457, x = h_49_cast_fp16)[name = string("transpose_11")]; + tensor var_3460_cast_fp16 = expand_dims(axes = var_3460_axes_0, x = var_3458_cast_fp16)[name = string("op_3460_cast_fp16")]; + string var_3476_pad_type_0 = const()[name = string("op_3476_pad_type_0"), val = string("valid")]; + tensor var_3476_strides_0 = const()[name = string("op_3476_strides_0"), val = tensor([1, 1])]; + tensor var_3476_pad_0 = const()[name = string("op_3476_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3476_dilations_0 = const()[name = string("op_3476_dilations_0"), val = tensor([1, 1])]; + int32 var_3476_groups_0 = const()[name = string("op_3476_groups_0"), val = int32(1)]; + tensor var_3476 = conv(dilations = var_3476_dilations_0, groups = var_3476_groups_0, pad = var_3476_pad_0, pad_type = var_3476_pad_type_0, strides = var_3476_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_3460_cast_fp16)[name = string("op_3476")]; + tensor var_3481 = const()[name = string("op_3481"), val = tensor([1, 8, 512, 1])]; + tensor var_3482 = reshape(shape = var_3481, x = var_3476)[name = string("op_3482")]; + tensor var_3487 = const()[name = string("op_3487"), val = tensor([0, 1, 3, 2])]; + tensor var_3497 = const()[name = string("op_3497"), val = tensor([1, 8, 512])]; + tensor var_3488 = transpose(perm = var_3487, x = var_3482)[name = string("transpose_10")]; + tensor x_129 = reshape(shape = var_3497, x = var_3488)[name = string("x_129")]; + int32 var_3503 = const()[name = string("op_3503"), val = int32(-1)]; + fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)]; + tensor var_3505 = mul(x = x_129, y = const_65_promoted)[name = string("op_3505")]; + bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; + tensor input_197 = concat(axis = var_3503, interleave = input_197_interleave_0, values = (x_129, var_3505))[name = string("input_197")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_3500_to_fp16, x = input_197)[name = string("normed_197_cast_fp16")]; + tensor var_3510_split_sizes_0 = const()[name = string("op_3510_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3510_axis_0 = const()[name = string("op_3510_axis_0"), val = int32(-1)]; + tensor var_3510_0, tensor var_3510_1 = split(axis = var_3510_axis_0, split_sizes = var_3510_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_3510")]; + tensor var_3512 = mul(x = var_3510_0, y = layers_2_self_attn_q_norm_weight)[name = string("op_3512")]; + tensor var_3517 = const()[name = string("op_3517"), val = tensor([1, 8, 1, 512])]; + tensor q_51 = reshape(shape = var_3517, x = var_3512)[name = string("q_51")]; + tensor var_3519_cast_fp16 = mul(x = q_51, y = cos_f)[name = string("op_3519_cast_fp16")]; + tensor var_3520_split_sizes_0 = const()[name = string("op_3520_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3520_axis_0 = const()[name = string("op_3520_axis_0"), val = int32(-1)]; + tensor var_3520_0, tensor var_3520_1 = split(axis = var_3520_axis_0, split_sizes = var_3520_split_sizes_0, x = q_51)[name = string("op_3520")]; + fp16 const_66_promoted = const()[name = string("const_66_promoted"), val = fp16(-0x1p+0)]; + tensor var_3522 = mul(x = var_3520_1, y = const_66_promoted)[name = string("op_3522")]; + int32 var_3524 = const()[name = string("op_3524"), val = int32(-1)]; + bool var_3525_interleave_0 = const()[name = string("op_3525_interleave_0"), val = bool(false)]; + tensor var_3525 = concat(axis = var_3524, interleave = var_3525_interleave_0, values = (var_3522, var_3520_0))[name = string("op_3525")]; + tensor var_3526_cast_fp16 = mul(x = var_3525, y = sin_f)[name = string("op_3526_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_3519_cast_fp16, y = var_3526_cast_fp16)[name = string("q_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_cast_fp16, y = transpose_38_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_131_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_full)[name = string("x_131_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_131_cast_fp16)[name = string("reduce_max_8")]; + tensor var_3558 = sub(x = x_131_cast_fp16, y = reduce_max_8)[name = string("op_3558")]; + tensor var_3564 = exp(x = var_3558)[name = string("op_3564")]; + tensor var_3574_axes_0 = const()[name = string("op_3574_axes_0"), val = tensor([-1])]; + bool var_3574_keep_dims_0 = const()[name = string("op_3574_keep_dims_0"), val = bool(true)]; + tensor var_3574 = reduce_sum(axes = var_3574_axes_0, keep_dims = var_3574_keep_dims_0, x = var_3564)[name = string("op_3574")]; + tensor var_3580_cast_fp16 = real_div(x = var_3564, y = var_3574)[name = string("op_3580_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_3580_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_3591 = const()[name = string("op_3591"), val = tensor([0, 2, 1, 3])]; + tensor var_3598 = const()[name = string("op_3598"), val = tensor([1, 1, -1])]; + tensor var_3592_cast_fp16 = transpose(perm = var_3591, x = attn_output_49_cast_fp16)[name = string("transpose_9")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_3598, x = var_3592_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_3603 = const()[name = string("op_3603"), val = tensor([0, 2, 1])]; + string var_3619_pad_type_0 = const()[name = string("op_3619_pad_type_0"), val = string("valid")]; + int32 var_3619_groups_0 = const()[name = string("op_3619_groups_0"), val = int32(1)]; + tensor var_3619_strides_0 = const()[name = string("op_3619_strides_0"), val = tensor([1])]; + tensor var_3619_pad_0 = const()[name = string("op_3619_pad_0"), val = tensor([0, 0])]; + tensor var_3619_dilations_0 = const()[name = string("op_3619_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412394176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417637120))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3604_cast_fp16 = transpose(perm = var_3603, x = attn_output_51_cast_fp16)[name = string("transpose_8")]; + tensor var_3619_cast_fp16 = conv(dilations = var_3619_dilations_0, groups = var_3619_groups_0, pad = var_3619_pad_0, pad_type = var_3619_pad_type_0, strides = var_3619_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_3604_cast_fp16)[name = string("op_3619_cast_fp16")]; + tensor var_3623 = const()[name = string("op_3623"), val = tensor([0, 2, 1])]; + int32 var_3629 = const()[name = string("op_3629"), val = int32(-1)]; + fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_135_cast_fp16 = transpose(perm = var_3623, x = var_3619_cast_fp16)[name = string("transpose_7")]; + tensor var_3631_cast_fp16 = mul(x = x_135_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3631_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_3629, interleave = input_201_interleave_0, values = (x_135_cast_fp16, var_3631_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_3626_to_fp16 = const()[name = string("op_3626_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_3626_to_fp16, x = input_201_cast_fp16)[name = string("normed_201_cast_fp16")]; + tensor var_3636_split_sizes_0 = const()[name = string("op_3636_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3636_axis_0 = const()[name = string("op_3636_axis_0"), val = int32(-1)]; + tensor var_3636_cast_fp16_0, tensor var_3636_cast_fp16_1 = split(axis = var_3636_axis_0, split_sizes = var_3636_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_3636_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417639744)))]; + tensor attn_output_cast_fp16 = mul(x = var_3636_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_137_cast_fp16 = add(x = x_127_cast_fp16, y = attn_output_cast_fp16)[name = string("x_137_cast_fp16")]; + int32 var_3645 = const()[name = string("op_3645"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3647_cast_fp16 = mul(x = x_137_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_3647_cast_fp16")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203_cast_fp16 = concat(axis = var_3645, interleave = input_203_interleave_0, values = (x_137_cast_fp16, var_3647_cast_fp16))[name = string("input_203_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_3642_to_fp16 = const()[name = string("op_3642_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_3642_to_fp16, x = input_203_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor var_3652_split_sizes_0 = const()[name = string("op_3652_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3652_axis_0 = const()[name = string("op_3652_axis_0"), val = int32(-1)]; + tensor var_3652_cast_fp16_0, tensor var_3652_cast_fp16_1 = split(axis = var_3652_axis_0, split_sizes = var_3652_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_3652_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417644928)))]; + tensor h_51_cast_fp16 = mul(x = var_3652_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_3663 = const()[name = string("op_3663"), val = tensor([0, 2, 1])]; + tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; + tensor var_3664 = transpose(perm = var_3663, x = h_51_cast_fp16)[name = string("transpose_6")]; + tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_3664)[name = string("input_205")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_205)[name = string("gate_33")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_205)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_33)[name = string("gate")]; + tensor input_207 = mul(x = gate, y = up)[name = string("input_207")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_207)[name = string("mlp_out")]; + tensor var_3704_axes_0 = const()[name = string("op_3704_axes_0"), val = tensor([2])]; + tensor var_3704 = squeeze(axes = var_3704_axes_0, x = mlp_out)[name = string("op_3704")]; + tensor var_3708 = const()[name = string("op_3708"), val = tensor([0, 2, 1])]; + int32 var_3714 = const()[name = string("op_3714"), val = int32(-1)]; + fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)]; + tensor x_139 = transpose(perm = var_3708, x = var_3704)[name = string("transpose_5")]; + tensor var_3716 = mul(x = x_139, y = const_69_promoted)[name = string("op_3716")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209 = concat(axis = var_3714, interleave = input_209_interleave_0, values = (x_139, var_3716))[name = string("input_209")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_3711_to_fp16 = const()[name = string("op_3711_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_3711_to_fp16, x = input_209)[name = string("normed_209_cast_fp16")]; + tensor var_3721_split_sizes_0 = const()[name = string("op_3721_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3721_axis_0 = const()[name = string("op_3721_axis_0"), val = int32(-1)]; + tensor var_3721_0, tensor var_3721_1 = split(axis = var_3721_axis_0, split_sizes = var_3721_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_3721")]; + tensor hidden_states_83 = mul(x = var_3721_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_137_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 10496])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 1, 1])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, true])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; + tensor var_3749 = const()[name = string("op_3749"), val = tensor([0, 2, 1])]; + tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; + tensor var_3750 = transpose(perm = var_3749, x = hidden_states_85_cast_fp16)[name = string("transpose_4")]; + tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_3750)[name = string("input_211")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_211)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_3769 = const()[name = string("op_3769"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_3770_cast_fp16 = transpose(perm = var_3769, x = per_layer_slice_cast_fp16)[name = string("transpose_3")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_3770_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_213_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_cast_fp16)[name = string("input_213_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417650112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417977856))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_213_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_3786_axes_0 = const()[name = string("op_3786_axes_0"), val = tensor([2])]; + tensor var_3786_cast_fp16 = squeeze(axes = var_3786_axes_0, x = gated_cast_fp16)[name = string("op_3786_cast_fp16")]; + tensor var_3790 = const()[name = string("op_3790"), val = tensor([0, 2, 1])]; + int32 var_3796 = const()[name = string("op_3796"), val = int32(-1)]; + fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_141_cast_fp16 = transpose(perm = var_3790, x = var_3786_cast_fp16)[name = string("transpose_2")]; + tensor var_3798_cast_fp16 = mul(x = x_141_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3798_cast_fp16")]; + bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)]; + tensor input_215_cast_fp16 = concat(axis = var_3796, interleave = input_215_interleave_0, values = (x_141_cast_fp16, var_3798_cast_fp16))[name = string("input_215_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_3793_to_fp16 = const()[name = string("op_3793_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_3793_to_fp16, x = input_215_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_3803_split_sizes_0 = const()[name = string("op_3803_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3803_axis_0 = const()[name = string("op_3803_axis_0"), val = int32(-1)]; + tensor var_3803_cast_fp16_0, tensor var_3803_cast_fp16_1 = split(axis = var_3803_axis_0, split_sizes = var_3803_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_3803_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417980480)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_3803_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = tensor([0x1.c8p-2])]; + tensor x_cast_fp16 = mul(x = hidden_states_cast_fp16, y = const_71_promoted_to_fp16)[name = string("x_cast_fp16")]; + int32 var_3818 = const()[name = string("op_3818"), val = int32(-1)]; + fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3820_cast_fp16 = mul(x = x_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_3820_cast_fp16")]; + bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; + tensor input_217_cast_fp16 = concat(axis = var_3818, interleave = input_217_interleave_0, values = (x_cast_fp16, var_3820_cast_fp16))[name = string("input_217_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_3815_to_fp16 = const()[name = string("op_3815_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_3815_to_fp16, x = input_217_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor var_3825_split_sizes_0 = const()[name = string("op_3825_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3825_axis_0 = const()[name = string("op_3825_axis_0"), val = int32(-1)]; + tensor var_3825_cast_fp16_0, tensor var_3825_cast_fp16_1 = split(axis = var_3825_axis_0, split_sizes = var_3825_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_3825_cast_fp16")]; + tensor norm_weight_promoted_to_fp16 = const()[name = string("norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417985664)))]; + tensor hidden_states_out = mul(x = var_3825_cast_fp16_0, y = norm_weight_promoted_to_fp16)[name = string("normed_221_cast_fp16")]; + tensor var_3836 = const()[name = string("op_3836"), val = tensor([0, 2, 1])]; + tensor squeeze_9_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417990848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(753535232))))[name = string("squeeze_9_palettized")]; + string var_3852_pad_type_0 = const()[name = string("op_3852_pad_type_0"), val = string("valid")]; + int32 var_3852_groups_0 = const()[name = string("op_3852_groups_0"), val = int32(1)]; + tensor var_3852_strides_0 = const()[name = string("op_3852_strides_0"), val = tensor([1])]; + tensor var_3852_pad_0 = const()[name = string("op_3852_pad_0"), val = tensor([0, 0])]; + tensor var_3852_dilations_0 = const()[name = string("op_3852_dilations_0"), val = tensor([1])]; + tensor var_3837 = transpose(perm = var_3836, x = hidden_states_out)[name = string("transpose_1")]; + tensor var_3852 = conv(dilations = var_3852_dilations_0, groups = var_3852_groups_0, pad = var_3852_pad_0, pad_type = var_3852_pad_type_0, strides = var_3852_strides_0, weight = squeeze_9_palettized, x = var_3837)[name = string("op_3852")]; + tensor var_3856 = const()[name = string("op_3856"), val = tensor([0, 2, 1])]; + fp16 _inversed_3859_y_0_to_fp16 = const()[name = string("_inversed_3859_y_0_to_fp16"), val = fp16(0x1.11p-5)]; + tensor logits_1 = transpose(perm = var_3856, x = var_3852)[name = string("transpose_0")]; + tensor _inversed_3859_cast_fp16 = mul(x = logits_1, y = _inversed_3859_y_0_to_fp16)[name = string("_inversed_3859_cast_fp16")]; + tensor var_3860_cast_fp16 = tanh(x = _inversed_3859_cast_fp16)[name = string("op_3860_cast_fp16")]; + fp16 var_3861_to_fp16 = const()[name = string("op_3861_to_fp16"), val = fp16(0x1.ep+4)]; + tensor logits_3_cast_fp16 = mul(x = var_3860_cast_fp16, y = var_3861_to_fp16)[name = string("logits_3_cast_fp16")]; + tensor logits_axes_0 = const()[name = string("logits_axes_0"), val = tensor([0])]; + tensor logits_cast_fp16 = squeeze(axes = logits_axes_0, x = logits_3_cast_fp16)[name = string("logits_cast_fp16")]; + int32 var_3866 = const()[name = string("op_3866"), val = int32(-1)]; + int32 token_id_axis_0 = const()[name = string("token_id_axis_0"), val = int32(-1)]; + bool token_id_keep_dims_0 = const()[name = string("token_id_keep_dims_0"), val = bool(false)]; + string token_id_output_dtype_0 = const()[name = string("token_id_output_dtype_0"), val = string("int32")]; + tensor token_id = reduce_argmax(axis = token_id_axis_0, keep_dims = token_id_keep_dims_0, output_dtype = token_id_output_dtype_0, x = logits_cast_fp16)[name = string("token_id_cast_fp16")]; + tensor var_3868_axes_0 = const()[name = string("op_3868_axes_0"), val = tensor([-1])]; + tensor var_3868 = expand_dims(axes = var_3868_axes_0, x = token_id)[name = string("op_3868")]; + bool var_3869_validate_indices_0 = const()[name = string("op_3869_validate_indices_0"), val = bool(false)]; + tensor var_3869_cast_fp16 = gather_along_axis(axis = var_3866, indices = var_3868, validate_indices = var_3869_validate_indices_0, x = logits_cast_fp16)[name = string("op_3869_cast_fp16")]; + tensor var_3870_axes_0 = const()[name = string("op_3870_axes_0"), val = tensor([-1])]; + tensor token_logit = squeeze(axes = var_3870_axes_0, x = var_3869_cast_fp16)[name = string("op_3870_cast_fp16")]; + tensor update_mask_tmp = identity(x = update_mask)[name = string("update_mask_tmp")]; + } -> (token_id, token_logit, hidden_states_out); + func verify_qK(tensor causal_mask_full, tensor causal_mask_sliding, tensor cos_f, tensor cos_s, tensor hidden_states, tensor kv13_k, tensor kv13_v, tensor kv14_k, tensor kv14_v, tensor per_layer_combined, tensor sin_f, tensor sin_s) { + tensor layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2621568))))[name = string("layers_0_self_attn_q_proj_weight_palettized")]; + tensor layers_0_self_attn_q_norm_weight = const()[name = string("layers_0_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2623680)))]; + tensor layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2624256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15731520))))[name = string("layers_0_mlp_gate_proj_weight_palettized")]; + tensor layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15741824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28849088))))[name = string("layers_0_mlp_up_proj_weight_palettized")]; + tensor layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28859392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41966656))))[name = string("layers_0_mlp_down_proj_weight_palettized")]; + tensor layers_0_post_feedforward_layernorm_weight = const()[name = string("layers_0_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41969280)))]; + tensor layers_0_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41974464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302208))))[name = string("layers_0_per_layer_input_gate_weight_palettized")]; + tensor layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42302528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44924032))))[name = string("layers_1_self_attn_q_proj_weight_palettized")]; + tensor layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44926144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58033408))))[name = string("layers_1_mlp_gate_proj_weight_palettized")]; + tensor layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58043712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71150976))))[name = string("layers_1_mlp_up_proj_weight_palettized")]; + tensor layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71161280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84268544))))[name = string("layers_1_mlp_down_proj_weight_palettized")]; + tensor layers_1_post_feedforward_layernorm_weight = const()[name = string("layers_1_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84271168)))]; + tensor layers_1_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84276352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604096))))[name = string("layers_1_per_layer_input_gate_weight_palettized")]; + tensor layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84604416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89847360))))[name = string("layers_2_self_attn_q_proj_weight_palettized")]; + tensor layers_2_self_attn_q_norm_weight = const()[name = string("layers_2_self_attn_q_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89851520)))]; + tensor layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89852608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102959872))))[name = string("layers_2_mlp_gate_proj_weight_palettized")]; + tensor layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102970176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116077440))))[name = string("layers_2_mlp_up_proj_weight_palettized")]; + tensor layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116087744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129195008))))[name = string("layers_2_mlp_down_proj_weight_palettized")]; + tensor layers_2_post_feedforward_layernorm_weight = const()[name = string("layers_2_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129197632)))]; + tensor layers_2_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129202816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129530560))))[name = string("layers_2_per_layer_input_gate_weight_palettized")]; + tensor layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129530880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132152384))))[name = string("layers_3_self_attn_q_proj_weight_palettized")]; + tensor layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132154496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145261760))))[name = string("layers_3_mlp_gate_proj_weight_palettized")]; + tensor layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145272064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158379328))))[name = string("layers_3_mlp_up_proj_weight_palettized")]; + tensor layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158389632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171496896))))[name = string("layers_3_mlp_down_proj_weight_palettized")]; + tensor layers_3_post_feedforward_layernorm_weight = const()[name = string("layers_3_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171499520)))]; + tensor layers_3_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171504704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171832448))))[name = string("layers_3_per_layer_input_gate_weight_palettized")]; + tensor layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171832768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174454272))))[name = string("layers_4_self_attn_q_proj_weight_palettized")]; + tensor layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174456384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187563648))))[name = string("layers_4_mlp_gate_proj_weight_palettized")]; + tensor layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187573952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200681216))))[name = string("layers_4_mlp_up_proj_weight_palettized")]; + tensor layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200691520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213798784))))[name = string("layers_4_mlp_down_proj_weight_palettized")]; + tensor layers_4_post_feedforward_layernorm_weight = const()[name = string("layers_4_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213801408)))]; + tensor layers_4_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213806592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214134336))))[name = string("layers_4_per_layer_input_gate_weight_palettized")]; + tensor layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214134656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216756160))))[name = string("layers_5_self_attn_q_proj_weight_palettized")]; + tensor layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216758272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229865536))))[name = string("layers_5_mlp_gate_proj_weight_palettized")]; + tensor layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229875840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242983104))))[name = string("layers_5_mlp_up_proj_weight_palettized")]; + tensor layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242993408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256100672))))[name = string("layers_5_mlp_down_proj_weight_palettized")]; + tensor layers_5_post_feedforward_layernorm_weight = const()[name = string("layers_5_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256103296)))]; + tensor layers_5_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256108480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436224))))[name = string("layers_5_per_layer_input_gate_weight_palettized")]; + tensor layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256436544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259058048))))[name = string("layers_6_self_attn_q_proj_weight_palettized")]; + tensor layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259060160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272167424))))[name = string("layers_6_mlp_gate_proj_weight_palettized")]; + tensor layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272177728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285284992))))[name = string("layers_6_mlp_up_proj_weight_palettized")]; + tensor layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298402560))))[name = string("layers_6_mlp_down_proj_weight_palettized")]; + tensor layers_6_post_feedforward_layernorm_weight = const()[name = string("layers_6_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298405184)))]; + tensor layers_6_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298410368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738112))))[name = string("layers_6_per_layer_input_gate_weight_palettized")]; + tensor layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298738432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301359936))))[name = string("layers_7_self_attn_q_proj_weight_palettized")]; + tensor layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314469312))))[name = string("layers_7_mlp_gate_proj_weight_palettized")]; + tensor layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314479616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327586880))))[name = string("layers_7_mlp_up_proj_weight_palettized")]; + tensor layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327597184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340704448))))[name = string("layers_7_mlp_down_proj_weight_palettized")]; + tensor layers_7_post_feedforward_layernorm_weight = const()[name = string("layers_7_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340707072)))]; + tensor layers_7_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(340712256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040000))))[name = string("layers_7_per_layer_input_gate_weight_palettized")]; + tensor layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341040320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346283264))))[name = string("layers_8_self_attn_q_proj_weight_palettized")]; + tensor layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346287424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359394688))))[name = string("layers_8_mlp_gate_proj_weight_palettized")]; + tensor layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359404992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372512256))))[name = string("layers_8_mlp_up_proj_weight_palettized")]; + tensor layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372522560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385629824))))[name = string("layers_8_mlp_down_proj_weight_palettized")]; + tensor layers_8_post_feedforward_layernorm_weight = const()[name = string("layers_8_post_feedforward_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385632448)))]; + tensor layers_8_per_layer_input_gate_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385637632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385965376))))[name = string("layers_8_per_layer_input_gate_weight_palettized")]; + int32 var_451 = const()[name = string("op_451"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_453_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_453_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_451, interleave = input_1_interleave_0, values = (hidden_states, var_453_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_448_to_fp16 = const()[name = string("op_448_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_448_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor var_458_split_sizes_0 = const()[name = string("op_458_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_458_axis_0 = const()[name = string("op_458_axis_0"), val = int32(-1)]; + tensor var_458_cast_fp16_0, tensor var_458_cast_fp16_1 = split(axis = var_458_axis_0, split_sizes = var_458_split_sizes_0, x = normed_1_cast_fp16)[name = string("op_458_cast_fp16")]; + tensor layers_0_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385965696)))]; + tensor h_1_cast_fp16 = mul(x = var_458_cast_fp16_0, y = layers_0_input_layernorm_weight_promoted_to_fp16)[name = string("h_1_cast_fp16")]; + tensor var_464 = const()[name = string("op_464"), val = tensor([0, 2, 1])]; + tensor var_467_axes_0 = const()[name = string("op_467_axes_0"), val = tensor([2])]; + tensor var_465_cast_fp16 = transpose(perm = var_464, x = h_1_cast_fp16)[name = string("transpose_112")]; + tensor var_467_cast_fp16 = expand_dims(axes = var_467_axes_0, x = var_465_cast_fp16)[name = string("op_467_cast_fp16")]; + string q_1_pad_type_0 = const()[name = string("q_1_pad_type_0"), val = string("valid")]; + tensor q_1_strides_0 = const()[name = string("q_1_strides_0"), val = tensor([1, 1])]; + tensor q_1_pad_0 = const()[name = string("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_1_dilations_0 = const()[name = string("q_1_dilations_0"), val = tensor([1, 1])]; + int32 q_1_groups_0 = const()[name = string("q_1_groups_0"), val = int32(1)]; + tensor q_1 = conv(dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = layers_0_self_attn_q_proj_weight_palettized, x = var_467_cast_fp16)[name = string("q_1")]; + tensor var_488 = const()[name = string("op_488"), val = tensor([1, 8, 256, 3])]; + tensor var_489 = reshape(shape = var_488, x = q_1)[name = string("op_489")]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_512 = const()[name = string("op_512"), val = tensor([3, 8, 256])]; + tensor transpose_36 = transpose(perm = transpose_36_perm_0, x = var_489)[name = string("transpose_111")]; + tensor x_1 = reshape(shape = var_512, x = transpose_36)[name = string("x_1")]; + int32 var_518 = const()[name = string("op_518"), val = int32(-1)]; + fp16 const_1_promoted = const()[name = string("const_1_promoted"), val = fp16(-0x1p+0)]; + tensor var_520 = mul(x = x_1, y = const_1_promoted)[name = string("op_520")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_518, interleave = input_5_interleave_0, values = (x_1, var_520))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_515_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor var_525_split_sizes_0 = const()[name = string("op_525_split_sizes_0"), val = tensor([256, 256])]; + int32 var_525_axis_0 = const()[name = string("op_525_axis_0"), val = int32(-1)]; + tensor var_525_0, tensor var_525_1 = split(axis = var_525_axis_0, split_sizes = var_525_split_sizes_0, x = normed_5_cast_fp16)[name = string("op_525")]; + tensor q_5 = mul(x = var_525_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_5")]; + tensor var_532 = const()[name = string("op_532"), val = tensor([1, 3, 8, 256])]; + tensor var_533 = reshape(shape = var_532, x = q_5)[name = string("op_533")]; + tensor var_538 = const()[name = string("op_538"), val = tensor([0, 2, 1, 3])]; + tensor q_7 = transpose(perm = var_538, x = var_533)[name = string("transpose_110")]; + tensor var_540_cast_fp16 = mul(x = q_7, y = cos_s)[name = string("op_540_cast_fp16")]; + tensor var_541_split_sizes_0 = const()[name = string("op_541_split_sizes_0"), val = tensor([128, 128])]; + int32 var_541_axis_0 = const()[name = string("op_541_axis_0"), val = int32(-1)]; + tensor var_541_0, tensor var_541_1 = split(axis = var_541_axis_0, split_sizes = var_541_split_sizes_0, x = q_7)[name = string("op_541")]; + fp16 const_2_promoted = const()[name = string("const_2_promoted"), val = fp16(-0x1p+0)]; + tensor var_543 = mul(x = var_541_1, y = const_2_promoted)[name = string("op_543")]; + int32 var_545 = const()[name = string("op_545"), val = int32(-1)]; + bool var_546_interleave_0 = const()[name = string("op_546_interleave_0"), val = bool(false)]; + tensor var_546 = concat(axis = var_545, interleave = var_546_interleave_0, values = (var_543, var_541_0))[name = string("op_546")]; + tensor var_547_cast_fp16 = mul(x = var_546, y = sin_s)[name = string("op_547_cast_fp16")]; + tensor q_9_cast_fp16 = add(x = var_540_cast_fp16, y = var_547_cast_fp16)[name = string("q_9_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_0_reps_0 = const()[name = string("tile_0_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = kv13_k)[name = string("transpose_109")]; + tensor tile_0_cast_fp16 = tile(reps = tile_0_reps_0, x = transpose_0_cast_fp16)[name = string("tile_0_cast_fp16")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_0, x = tile_0_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = reshape_0_cast_fp16)[name = string("transpose_108")]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_1, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; + tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_2_perm_0 = const()[name = string("transpose_2_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_1_reps_0 = const()[name = string("tile_1_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = kv13_v)[name = string("transpose_107")]; + tensor tile_1_cast_fp16 = tile(reps = tile_1_reps_0, x = transpose_2_cast_fp16)[name = string("tile_1_cast_fp16")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([4, 2, 1, 512, 256])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_2, x = tile_1_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor transpose_3_perm_0 = const()[name = string("transpose_3_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([-1, 1, 512, 256])]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_106")]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_3, x = transpose_3_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor V_expanded_1_perm_0 = const()[name = string("V_expanded_1_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(false)]; + bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; + tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = reshape_1_cast_fp16)[name = string("transpose_105")]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = q_9_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_sliding)[name = string("x_3_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_3_cast_fp16)[name = string("reduce_max_0")]; + tensor var_579 = sub(x = x_3_cast_fp16, y = reduce_max_0)[name = string("op_579")]; + tensor var_585 = exp(x = var_579)[name = string("op_585")]; + tensor var_595_axes_0 = const()[name = string("op_595_axes_0"), val = tensor([-1])]; + bool var_595_keep_dims_0 = const()[name = string("op_595_keep_dims_0"), val = bool(true)]; + tensor var_595 = reduce_sum(axes = var_595_axes_0, keep_dims = var_595_keep_dims_0, x = var_585)[name = string("op_595")]; + tensor var_601_cast_fp16 = real_div(x = var_585, y = var_595)[name = string("op_601_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor V_expanded_1_cast_fp16 = transpose(perm = V_expanded_1_perm_0, x = reshape_3_cast_fp16)[name = string("transpose_104")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_601_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_612 = const()[name = string("op_612"), val = tensor([0, 2, 1, 3])]; + tensor var_619 = const()[name = string("op_619"), val = tensor([1, 3, -1])]; + tensor var_613_cast_fp16 = transpose(perm = var_612, x = attn_output_1_cast_fp16)[name = string("transpose_103")]; + tensor attn_output_3_cast_fp16 = reshape(shape = var_619, x = var_613_cast_fp16)[name = string("attn_output_3_cast_fp16")]; + tensor var_624 = const()[name = string("op_624"), val = tensor([0, 2, 1])]; + string var_640_pad_type_0 = const()[name = string("op_640_pad_type_0"), val = string("valid")]; + int32 var_640_groups_0 = const()[name = string("op_640_groups_0"), val = int32(1)]; + tensor var_640_strides_0 = const()[name = string("op_640_strides_0"), val = tensor([1])]; + tensor var_640_pad_0 = const()[name = string("op_640_pad_0"), val = tensor([0, 0])]; + tensor var_640_dilations_0 = const()[name = string("op_640_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385970880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388592384))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_625_cast_fp16 = transpose(perm = var_624, x = attn_output_3_cast_fp16)[name = string("transpose_102")]; + tensor var_640_cast_fp16 = conv(dilations = var_640_dilations_0, groups = var_640_groups_0, pad = var_640_pad_0, pad_type = var_640_pad_type_0, strides = var_640_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_625_cast_fp16)[name = string("op_640_cast_fp16")]; + tensor var_644 = const()[name = string("op_644"), val = tensor([0, 2, 1])]; + int32 var_650 = const()[name = string("op_650"), val = int32(-1)]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_7_cast_fp16 = transpose(perm = var_644, x = var_640_cast_fp16)[name = string("transpose_101")]; + tensor var_652_cast_fp16 = mul(x = x_7_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_652_cast_fp16")]; + bool input_9_interleave_0 = const()[name = string("input_9_interleave_0"), val = bool(false)]; + tensor input_9_cast_fp16 = concat(axis = var_650, interleave = input_9_interleave_0, values = (x_7_cast_fp16, var_652_cast_fp16))[name = string("input_9_cast_fp16")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_647_to_fp16 = const()[name = string("op_647_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_647_to_fp16, x = input_9_cast_fp16)[name = string("normed_9_cast_fp16")]; + tensor var_657_split_sizes_0 = const()[name = string("op_657_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_657_axis_0 = const()[name = string("op_657_axis_0"), val = int32(-1)]; + tensor var_657_cast_fp16_0, tensor var_657_cast_fp16_1 = split(axis = var_657_axis_0, split_sizes = var_657_split_sizes_0, x = normed_9_cast_fp16)[name = string("op_657_cast_fp16")]; + tensor layers_0_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388595008)))]; + tensor attn_output_5_cast_fp16 = mul(x = var_657_cast_fp16_0, y = layers_0_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = hidden_states, y = attn_output_5_cast_fp16)[name = string("x_9_cast_fp16")]; + int32 var_666 = const()[name = string("op_666"), val = int32(-1)]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_668_cast_fp16 = mul(x = x_9_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_668_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_666, interleave = input_11_interleave_0, values = (x_9_cast_fp16, var_668_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_663_to_fp16 = const()[name = string("op_663_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_663_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor var_673_split_sizes_0 = const()[name = string("op_673_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_673_axis_0 = const()[name = string("op_673_axis_0"), val = int32(-1)]; + tensor var_673_cast_fp16_0, tensor var_673_cast_fp16_1 = split(axis = var_673_axis_0, split_sizes = var_673_split_sizes_0, x = normed_13_cast_fp16)[name = string("op_673_cast_fp16")]; + tensor layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388600192)))]; + tensor h_3_cast_fp16 = mul(x = var_673_cast_fp16_0, y = layers_0_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_3_cast_fp16")]; + tensor var_684 = const()[name = string("op_684"), val = tensor([0, 2, 1])]; + tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; + tensor var_685 = transpose(perm = var_684, x = h_3_cast_fp16)[name = string("transpose_100")]; + tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_685)[name = string("input_13")]; + string gate_1_pad_type_0 = const()[name = string("gate_1_pad_type_0"), val = string("valid")]; + tensor gate_1_strides_0 = const()[name = string("gate_1_strides_0"), val = tensor([1, 1])]; + tensor gate_1_pad_0 = const()[name = string("gate_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_1_dilations_0 = const()[name = string("gate_1_dilations_0"), val = tensor([1, 1])]; + int32 gate_1_groups_0 = const()[name = string("gate_1_groups_0"), val = int32(1)]; + tensor gate_1 = conv(dilations = gate_1_dilations_0, groups = gate_1_groups_0, pad = gate_1_pad_0, pad_type = gate_1_pad_type_0, strides = gate_1_strides_0, weight = layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("gate_1")]; + string up_1_pad_type_0 = const()[name = string("up_1_pad_type_0"), val = string("valid")]; + tensor up_1_strides_0 = const()[name = string("up_1_strides_0"), val = tensor([1, 1])]; + tensor up_1_pad_0 = const()[name = string("up_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_1_dilations_0 = const()[name = string("up_1_dilations_0"), val = tensor([1, 1])]; + int32 up_1_groups_0 = const()[name = string("up_1_groups_0"), val = int32(1)]; + tensor up_1 = conv(dilations = up_1_dilations_0, groups = up_1_groups_0, pad = up_1_pad_0, pad_type = up_1_pad_type_0, strides = up_1_strides_0, weight = layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("up_1")]; + string gate_3_mode_0 = const()[name = string("gate_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_3 = gelu(mode = gate_3_mode_0, x = gate_1)[name = string("gate_3")]; + tensor input_15 = mul(x = gate_3, y = up_1)[name = string("input_15")]; + string mlp_out_1_pad_type_0 = const()[name = string("mlp_out_1_pad_type_0"), val = string("valid")]; + tensor mlp_out_1_strides_0 = const()[name = string("mlp_out_1_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_1_pad_0 = const()[name = string("mlp_out_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_1_dilations_0 = const()[name = string("mlp_out_1_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_1_groups_0 = const()[name = string("mlp_out_1_groups_0"), val = int32(1)]; + tensor mlp_out_1 = conv(dilations = mlp_out_1_dilations_0, groups = mlp_out_1_groups_0, pad = mlp_out_1_pad_0, pad_type = mlp_out_1_pad_type_0, strides = mlp_out_1_strides_0, weight = layers_0_mlp_down_proj_weight_palettized, x = input_15)[name = string("mlp_out_1")]; + tensor var_725_axes_0 = const()[name = string("op_725_axes_0"), val = tensor([2])]; + tensor var_725 = squeeze(axes = var_725_axes_0, x = mlp_out_1)[name = string("op_725")]; + tensor var_729 = const()[name = string("op_729"), val = tensor([0, 2, 1])]; + int32 var_735 = const()[name = string("op_735"), val = int32(-1)]; + fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; + tensor x_11 = transpose(perm = var_729, x = var_725)[name = string("transpose_99")]; + tensor var_737 = mul(x = x_11, y = const_5_promoted)[name = string("op_737")]; + bool input_17_interleave_0 = const()[name = string("input_17_interleave_0"), val = bool(false)]; + tensor input_17 = concat(axis = var_735, interleave = input_17_interleave_0, values = (x_11, var_737))[name = string("input_17")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_732_to_fp16, x = input_17)[name = string("normed_17_cast_fp16")]; + tensor var_742_split_sizes_0 = const()[name = string("op_742_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_742_axis_0 = const()[name = string("op_742_axis_0"), val = int32(-1)]; + tensor var_742_0, tensor var_742_1 = split(axis = var_742_axis_0, split_sizes = var_742_split_sizes_0, x = normed_17_cast_fp16)[name = string("op_742")]; + tensor hidden_states_3 = mul(x = var_742_0, y = layers_0_post_feedforward_layernorm_weight)[name = string("hidden_states_3")]; + tensor hidden_states_5_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_3)[name = string("hidden_states_5_cast_fp16")]; + tensor per_layer_slice_1_begin_0 = const()[name = string("per_layer_slice_1_begin_0"), val = tensor([0, 0, 8448])]; + tensor per_layer_slice_1_end_0 = const()[name = string("per_layer_slice_1_end_0"), val = tensor([1, 3, 8704])]; + tensor per_layer_slice_1_end_mask_0 = const()[name = string("per_layer_slice_1_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_1_cast_fp16 = slice_by_index(begin = per_layer_slice_1_begin_0, end = per_layer_slice_1_end_0, end_mask = per_layer_slice_1_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_1_cast_fp16")]; + tensor var_770 = const()[name = string("op_770"), val = tensor([0, 2, 1])]; + tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([2])]; + tensor var_771 = transpose(perm = var_770, x = hidden_states_5_cast_fp16)[name = string("transpose_98")]; + tensor input_19 = expand_dims(axes = input_19_axes_0, x = var_771)[name = string("input_19")]; + string gated_1_pad_type_0 = const()[name = string("gated_1_pad_type_0"), val = string("valid")]; + tensor gated_1_strides_0 = const()[name = string("gated_1_strides_0"), val = tensor([1, 1])]; + tensor gated_1_pad_0 = const()[name = string("gated_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_1_dilations_0 = const()[name = string("gated_1_dilations_0"), val = tensor([1, 1])]; + int32 gated_1_groups_0 = const()[name = string("gated_1_groups_0"), val = int32(1)]; + tensor gated_1 = conv(dilations = gated_1_dilations_0, groups = gated_1_groups_0, pad = gated_1_pad_0, pad_type = gated_1_pad_type_0, strides = gated_1_strides_0, weight = layers_0_per_layer_input_gate_weight_palettized, x = input_19)[name = string("gated_1")]; + string gated_3_mode_0 = const()[name = string("gated_3_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_3 = gelu(mode = gated_3_mode_0, x = gated_1)[name = string("gated_3")]; + tensor var_790 = const()[name = string("op_790"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_1_axes_0 = const()[name = string("per_layer_slice_conv_1_axes_0"), val = tensor([2])]; + tensor var_791_cast_fp16 = transpose(perm = var_790, x = per_layer_slice_1_cast_fp16)[name = string("transpose_97")]; + tensor per_layer_slice_conv_1_cast_fp16 = expand_dims(axes = per_layer_slice_conv_1_axes_0, x = var_791_cast_fp16)[name = string("per_layer_slice_conv_1_cast_fp16")]; + tensor input_21_cast_fp16 = mul(x = gated_3, y = per_layer_slice_conv_1_cast_fp16)[name = string("input_21_cast_fp16")]; + string gated_5_pad_type_0 = const()[name = string("gated_5_pad_type_0"), val = string("valid")]; + tensor gated_5_strides_0 = const()[name = string("gated_5_strides_0"), val = tensor([1, 1])]; + tensor gated_5_pad_0 = const()[name = string("gated_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_5_dilations_0 = const()[name = string("gated_5_dilations_0"), val = tensor([1, 1])]; + int32 gated_5_groups_0 = const()[name = string("gated_5_groups_0"), val = int32(1)]; + tensor layers_0_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388605376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388933120))))[name = string("layers_0_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_5_cast_fp16 = conv(dilations = gated_5_dilations_0, groups = gated_5_groups_0, pad = gated_5_pad_0, pad_type = gated_5_pad_type_0, strides = gated_5_strides_0, weight = layers_0_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_21_cast_fp16)[name = string("gated_5_cast_fp16")]; + tensor var_807_axes_0 = const()[name = string("op_807_axes_0"), val = tensor([2])]; + tensor var_807_cast_fp16 = squeeze(axes = var_807_axes_0, x = gated_5_cast_fp16)[name = string("op_807_cast_fp16")]; + tensor var_811 = const()[name = string("op_811"), val = tensor([0, 2, 1])]; + int32 var_817 = const()[name = string("op_817"), val = int32(-1)]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_13_cast_fp16 = transpose(perm = var_811, x = var_807_cast_fp16)[name = string("transpose_96")]; + tensor var_819_cast_fp16 = mul(x = x_13_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_819_cast_fp16")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23_cast_fp16 = concat(axis = var_817, interleave = input_23_interleave_0, values = (x_13_cast_fp16, var_819_cast_fp16))[name = string("input_23_cast_fp16")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_814_to_fp16 = const()[name = string("op_814_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_814_to_fp16, x = input_23_cast_fp16)[name = string("normed_21_cast_fp16")]; + tensor var_824_split_sizes_0 = const()[name = string("op_824_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_824_axis_0 = const()[name = string("op_824_axis_0"), val = int32(-1)]; + tensor var_824_cast_fp16_0, tensor var_824_cast_fp16_1 = split(axis = var_824_axis_0, split_sizes = var_824_split_sizes_0, x = normed_21_cast_fp16)[name = string("op_824_cast_fp16")]; + tensor layers_0_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_0_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388935744)))]; + tensor hidden_states_9_cast_fp16 = mul(x = var_824_cast_fp16_0, y = layers_0_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_9_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = tensor([0x1.a6p-1])]; + tensor x_15_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_7_promoted_to_fp16)[name = string("x_15_cast_fp16")]; + int32 var_839 = const()[name = string("op_839"), val = int32(-1)]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_841_cast_fp16 = mul(x = x_15_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_841_cast_fp16")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25_cast_fp16 = concat(axis = var_839, interleave = input_25_interleave_0, values = (x_15_cast_fp16, var_841_cast_fp16))[name = string("input_25_cast_fp16")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_836_to_fp16 = const()[name = string("op_836_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_836_to_fp16, x = input_25_cast_fp16)[name = string("normed_25_cast_fp16")]; + tensor var_846_split_sizes_0 = const()[name = string("op_846_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_846_axis_0 = const()[name = string("op_846_axis_0"), val = int32(-1)]; + tensor var_846_cast_fp16_0, tensor var_846_cast_fp16_1 = split(axis = var_846_axis_0, split_sizes = var_846_split_sizes_0, x = normed_25_cast_fp16)[name = string("op_846_cast_fp16")]; + tensor layers_1_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388940928)))]; + tensor h_7_cast_fp16 = mul(x = var_846_cast_fp16_0, y = layers_1_input_layernorm_weight_promoted_to_fp16)[name = string("h_7_cast_fp16")]; + tensor var_852 = const()[name = string("op_852"), val = tensor([0, 2, 1])]; + tensor var_855_axes_0 = const()[name = string("op_855_axes_0"), val = tensor([2])]; + tensor var_853_cast_fp16 = transpose(perm = var_852, x = h_7_cast_fp16)[name = string("transpose_95")]; + tensor var_855_cast_fp16 = expand_dims(axes = var_855_axes_0, x = var_853_cast_fp16)[name = string("op_855_cast_fp16")]; + string q_11_pad_type_0 = const()[name = string("q_11_pad_type_0"), val = string("valid")]; + tensor q_11_strides_0 = const()[name = string("q_11_strides_0"), val = tensor([1, 1])]; + tensor q_11_pad_0 = const()[name = string("q_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_11_dilations_0 = const()[name = string("q_11_dilations_0"), val = tensor([1, 1])]; + int32 q_11_groups_0 = const()[name = string("q_11_groups_0"), val = int32(1)]; + tensor q_11 = conv(dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = layers_1_self_attn_q_proj_weight_palettized, x = var_855_cast_fp16)[name = string("q_11")]; + tensor var_876 = const()[name = string("op_876"), val = tensor([1, 8, 256, 3])]; + tensor var_877 = reshape(shape = var_876, x = q_11)[name = string("op_877")]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_900 = const()[name = string("op_900"), val = tensor([3, 8, 256])]; + tensor transpose_38 = transpose(perm = transpose_38_perm_0, x = var_877)[name = string("transpose_94")]; + tensor x_17 = reshape(shape = var_900, x = transpose_38)[name = string("x_17")]; + int32 var_906 = const()[name = string("op_906"), val = int32(-1)]; + fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; + tensor var_908 = mul(x = x_17, y = const_9_promoted)[name = string("op_908")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29 = concat(axis = var_906, interleave = input_29_interleave_0, values = (x_17, var_908))[name = string("input_29")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_903_to_fp16 = const()[name = string("op_903_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_903_to_fp16, x = input_29)[name = string("normed_29_cast_fp16")]; + tensor var_913_split_sizes_0 = const()[name = string("op_913_split_sizes_0"), val = tensor([256, 256])]; + int32 var_913_axis_0 = const()[name = string("op_913_axis_0"), val = int32(-1)]; + tensor var_913_0, tensor var_913_1 = split(axis = var_913_axis_0, split_sizes = var_913_split_sizes_0, x = normed_29_cast_fp16)[name = string("op_913")]; + tensor q_15 = mul(x = var_913_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_15")]; + tensor var_920 = const()[name = string("op_920"), val = tensor([1, 3, 8, 256])]; + tensor var_921 = reshape(shape = var_920, x = q_15)[name = string("op_921")]; + tensor var_926 = const()[name = string("op_926"), val = tensor([0, 2, 1, 3])]; + tensor q_17 = transpose(perm = var_926, x = var_921)[name = string("transpose_93")]; + tensor var_928_cast_fp16 = mul(x = q_17, y = cos_s)[name = string("op_928_cast_fp16")]; + tensor var_929_split_sizes_0 = const()[name = string("op_929_split_sizes_0"), val = tensor([128, 128])]; + int32 var_929_axis_0 = const()[name = string("op_929_axis_0"), val = int32(-1)]; + tensor var_929_0, tensor var_929_1 = split(axis = var_929_axis_0, split_sizes = var_929_split_sizes_0, x = q_17)[name = string("op_929")]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor var_931 = mul(x = var_929_1, y = const_10_promoted)[name = string("op_931")]; + int32 var_933 = const()[name = string("op_933"), val = int32(-1)]; + bool var_934_interleave_0 = const()[name = string("op_934_interleave_0"), val = bool(false)]; + tensor var_934 = concat(axis = var_933, interleave = var_934_interleave_0, values = (var_931, var_929_0))[name = string("op_934")]; + tensor var_935_cast_fp16 = mul(x = var_934, y = sin_s)[name = string("op_935_cast_fp16")]; + tensor q_19_cast_fp16 = add(x = var_928_cast_fp16, y = var_935_cast_fp16)[name = string("q_19_cast_fp16")]; + bool attn_weights_5_transpose_x_0 = const()[name = string("attn_weights_5_transpose_x_0"), val = bool(false)]; + bool attn_weights_5_transpose_y_0 = const()[name = string("attn_weights_5_transpose_y_0"), val = bool(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = q_19_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask_sliding)[name = string("x_19_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_19_cast_fp16)[name = string("reduce_max_1")]; + tensor var_967 = sub(x = x_19_cast_fp16, y = reduce_max_1)[name = string("op_967")]; + tensor var_973 = exp(x = var_967)[name = string("op_973")]; + tensor var_983_axes_0 = const()[name = string("op_983_axes_0"), val = tensor([-1])]; + bool var_983_keep_dims_0 = const()[name = string("op_983_keep_dims_0"), val = bool(true)]; + tensor var_983 = reduce_sum(axes = var_983_axes_0, keep_dims = var_983_keep_dims_0, x = var_973)[name = string("op_983")]; + tensor var_989_cast_fp16 = real_div(x = var_973, y = var_983)[name = string("op_989_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = var_989_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_1000 = const()[name = string("op_1000"), val = tensor([0, 2, 1, 3])]; + tensor var_1007 = const()[name = string("op_1007"), val = tensor([1, 3, -1])]; + tensor var_1001_cast_fp16 = transpose(perm = var_1000, x = attn_output_7_cast_fp16)[name = string("transpose_92")]; + tensor attn_output_9_cast_fp16 = reshape(shape = var_1007, x = var_1001_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_1012 = const()[name = string("op_1012"), val = tensor([0, 2, 1])]; + string var_1028_pad_type_0 = const()[name = string("op_1028_pad_type_0"), val = string("valid")]; + int32 var_1028_groups_0 = const()[name = string("op_1028_groups_0"), val = int32(1)]; + tensor var_1028_strides_0 = const()[name = string("op_1028_strides_0"), val = tensor([1])]; + tensor var_1028_pad_0 = const()[name = string("op_1028_pad_0"), val = tensor([0, 0])]; + tensor var_1028_dilations_0 = const()[name = string("op_1028_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388946112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391567616))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1013_cast_fp16 = transpose(perm = var_1012, x = attn_output_9_cast_fp16)[name = string("transpose_91")]; + tensor var_1028_cast_fp16 = conv(dilations = var_1028_dilations_0, groups = var_1028_groups_0, pad = var_1028_pad_0, pad_type = var_1028_pad_type_0, strides = var_1028_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_1013_cast_fp16)[name = string("op_1028_cast_fp16")]; + tensor var_1032 = const()[name = string("op_1032"), val = tensor([0, 2, 1])]; + int32 var_1038 = const()[name = string("op_1038"), val = int32(-1)]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_23_cast_fp16 = transpose(perm = var_1032, x = var_1028_cast_fp16)[name = string("transpose_90")]; + tensor var_1040_cast_fp16 = mul(x = x_23_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_1040_cast_fp16")]; + bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; + tensor input_33_cast_fp16 = concat(axis = var_1038, interleave = input_33_interleave_0, values = (x_23_cast_fp16, var_1040_cast_fp16))[name = string("input_33_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_1035_to_fp16 = const()[name = string("op_1035_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_1035_to_fp16, x = input_33_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor var_1045_split_sizes_0 = const()[name = string("op_1045_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1045_axis_0 = const()[name = string("op_1045_axis_0"), val = int32(-1)]; + tensor var_1045_cast_fp16_0, tensor var_1045_cast_fp16_1 = split(axis = var_1045_axis_0, split_sizes = var_1045_split_sizes_0, x = normed_33_cast_fp16)[name = string("op_1045_cast_fp16")]; + tensor layers_1_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391570240)))]; + tensor attn_output_11_cast_fp16 = mul(x = var_1045_cast_fp16_0, y = layers_1_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_15_cast_fp16, y = attn_output_11_cast_fp16)[name = string("x_25_cast_fp16")]; + int32 var_1054 = const()[name = string("op_1054"), val = int32(-1)]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1056_cast_fp16 = mul(x = x_25_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1056_cast_fp16")]; + bool input_35_interleave_0 = const()[name = string("input_35_interleave_0"), val = bool(false)]; + tensor input_35_cast_fp16 = concat(axis = var_1054, interleave = input_35_interleave_0, values = (x_25_cast_fp16, var_1056_cast_fp16))[name = string("input_35_cast_fp16")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_1051_to_fp16 = const()[name = string("op_1051_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_1051_to_fp16, x = input_35_cast_fp16)[name = string("normed_37_cast_fp16")]; + tensor var_1061_split_sizes_0 = const()[name = string("op_1061_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1061_axis_0 = const()[name = string("op_1061_axis_0"), val = int32(-1)]; + tensor var_1061_cast_fp16_0, tensor var_1061_cast_fp16_1 = split(axis = var_1061_axis_0, split_sizes = var_1061_split_sizes_0, x = normed_37_cast_fp16)[name = string("op_1061_cast_fp16")]; + tensor layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391575424)))]; + tensor h_9_cast_fp16 = mul(x = var_1061_cast_fp16_0, y = layers_1_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_9_cast_fp16")]; + tensor var_1072 = const()[name = string("op_1072"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_1073 = transpose(perm = var_1072, x = h_9_cast_fp16)[name = string("transpose_89")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_1073)[name = string("input_37")]; + string gate_5_pad_type_0 = const()[name = string("gate_5_pad_type_0"), val = string("valid")]; + tensor gate_5_strides_0 = const()[name = string("gate_5_strides_0"), val = tensor([1, 1])]; + tensor gate_5_pad_0 = const()[name = string("gate_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_5_dilations_0 = const()[name = string("gate_5_dilations_0"), val = tensor([1, 1])]; + int32 gate_5_groups_0 = const()[name = string("gate_5_groups_0"), val = int32(1)]; + tensor gate_5 = conv(dilations = gate_5_dilations_0, groups = gate_5_groups_0, pad = gate_5_pad_0, pad_type = gate_5_pad_type_0, strides = gate_5_strides_0, weight = layers_1_mlp_gate_proj_weight_palettized, x = input_37)[name = string("gate_5")]; + string up_3_pad_type_0 = const()[name = string("up_3_pad_type_0"), val = string("valid")]; + tensor up_3_strides_0 = const()[name = string("up_3_strides_0"), val = tensor([1, 1])]; + tensor up_3_pad_0 = const()[name = string("up_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_3_dilations_0 = const()[name = string("up_3_dilations_0"), val = tensor([1, 1])]; + int32 up_3_groups_0 = const()[name = string("up_3_groups_0"), val = int32(1)]; + tensor up_3 = conv(dilations = up_3_dilations_0, groups = up_3_groups_0, pad = up_3_pad_0, pad_type = up_3_pad_type_0, strides = up_3_strides_0, weight = layers_1_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_3")]; + string gate_7_mode_0 = const()[name = string("gate_7_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_7 = gelu(mode = gate_7_mode_0, x = gate_5)[name = string("gate_7")]; + tensor input_39 = mul(x = gate_7, y = up_3)[name = string("input_39")]; + string mlp_out_3_pad_type_0 = const()[name = string("mlp_out_3_pad_type_0"), val = string("valid")]; + tensor mlp_out_3_strides_0 = const()[name = string("mlp_out_3_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_3_pad_0 = const()[name = string("mlp_out_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_3_dilations_0 = const()[name = string("mlp_out_3_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_3_groups_0 = const()[name = string("mlp_out_3_groups_0"), val = int32(1)]; + tensor mlp_out_3 = conv(dilations = mlp_out_3_dilations_0, groups = mlp_out_3_groups_0, pad = mlp_out_3_pad_0, pad_type = mlp_out_3_pad_type_0, strides = mlp_out_3_strides_0, weight = layers_1_mlp_down_proj_weight_palettized, x = input_39)[name = string("mlp_out_3")]; + tensor var_1113_axes_0 = const()[name = string("op_1113_axes_0"), val = tensor([2])]; + tensor var_1113 = squeeze(axes = var_1113_axes_0, x = mlp_out_3)[name = string("op_1113")]; + tensor var_1117 = const()[name = string("op_1117"), val = tensor([0, 2, 1])]; + int32 var_1123 = const()[name = string("op_1123"), val = int32(-1)]; + fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; + tensor x_27 = transpose(perm = var_1117, x = var_1113)[name = string("transpose_88")]; + tensor var_1125 = mul(x = x_27, y = const_13_promoted)[name = string("op_1125")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_1123, interleave = input_41_interleave_0, values = (x_27, var_1125))[name = string("input_41")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_1120_to_fp16 = const()[name = string("op_1120_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_1120_to_fp16, x = input_41)[name = string("normed_41_cast_fp16")]; + tensor var_1130_split_sizes_0 = const()[name = string("op_1130_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1130_axis_0 = const()[name = string("op_1130_axis_0"), val = int32(-1)]; + tensor var_1130_0, tensor var_1130_1 = split(axis = var_1130_axis_0, split_sizes = var_1130_split_sizes_0, x = normed_41_cast_fp16)[name = string("op_1130")]; + tensor hidden_states_13 = mul(x = var_1130_0, y = layers_1_post_feedforward_layernorm_weight)[name = string("hidden_states_13")]; + tensor hidden_states_15_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_13)[name = string("hidden_states_15_cast_fp16")]; + tensor per_layer_slice_3_begin_0 = const()[name = string("per_layer_slice_3_begin_0"), val = tensor([0, 0, 8704])]; + tensor per_layer_slice_3_end_0 = const()[name = string("per_layer_slice_3_end_0"), val = tensor([1, 3, 8960])]; + tensor per_layer_slice_3_end_mask_0 = const()[name = string("per_layer_slice_3_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_3_cast_fp16 = slice_by_index(begin = per_layer_slice_3_begin_0, end = per_layer_slice_3_end_0, end_mask = per_layer_slice_3_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_3_cast_fp16")]; + tensor var_1158 = const()[name = string("op_1158"), val = tensor([0, 2, 1])]; + tensor input_43_axes_0 = const()[name = string("input_43_axes_0"), val = tensor([2])]; + tensor var_1159 = transpose(perm = var_1158, x = hidden_states_15_cast_fp16)[name = string("transpose_87")]; + tensor input_43 = expand_dims(axes = input_43_axes_0, x = var_1159)[name = string("input_43")]; + string gated_7_pad_type_0 = const()[name = string("gated_7_pad_type_0"), val = string("valid")]; + tensor gated_7_strides_0 = const()[name = string("gated_7_strides_0"), val = tensor([1, 1])]; + tensor gated_7_pad_0 = const()[name = string("gated_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_7_dilations_0 = const()[name = string("gated_7_dilations_0"), val = tensor([1, 1])]; + int32 gated_7_groups_0 = const()[name = string("gated_7_groups_0"), val = int32(1)]; + tensor gated_7 = conv(dilations = gated_7_dilations_0, groups = gated_7_groups_0, pad = gated_7_pad_0, pad_type = gated_7_pad_type_0, strides = gated_7_strides_0, weight = layers_1_per_layer_input_gate_weight_palettized, x = input_43)[name = string("gated_7")]; + string gated_9_mode_0 = const()[name = string("gated_9_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_9 = gelu(mode = gated_9_mode_0, x = gated_7)[name = string("gated_9")]; + tensor var_1178 = const()[name = string("op_1178"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_3_axes_0 = const()[name = string("per_layer_slice_conv_3_axes_0"), val = tensor([2])]; + tensor var_1179_cast_fp16 = transpose(perm = var_1178, x = per_layer_slice_3_cast_fp16)[name = string("transpose_86")]; + tensor per_layer_slice_conv_3_cast_fp16 = expand_dims(axes = per_layer_slice_conv_3_axes_0, x = var_1179_cast_fp16)[name = string("per_layer_slice_conv_3_cast_fp16")]; + tensor input_45_cast_fp16 = mul(x = gated_9, y = per_layer_slice_conv_3_cast_fp16)[name = string("input_45_cast_fp16")]; + string gated_11_pad_type_0 = const()[name = string("gated_11_pad_type_0"), val = string("valid")]; + tensor gated_11_strides_0 = const()[name = string("gated_11_strides_0"), val = tensor([1, 1])]; + tensor gated_11_pad_0 = const()[name = string("gated_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_11_dilations_0 = const()[name = string("gated_11_dilations_0"), val = tensor([1, 1])]; + int32 gated_11_groups_0 = const()[name = string("gated_11_groups_0"), val = int32(1)]; + tensor layers_1_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391580608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391908352))))[name = string("layers_1_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_11_cast_fp16 = conv(dilations = gated_11_dilations_0, groups = gated_11_groups_0, pad = gated_11_pad_0, pad_type = gated_11_pad_type_0, strides = gated_11_strides_0, weight = layers_1_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_45_cast_fp16)[name = string("gated_11_cast_fp16")]; + tensor var_1195_axes_0 = const()[name = string("op_1195_axes_0"), val = tensor([2])]; + tensor var_1195_cast_fp16 = squeeze(axes = var_1195_axes_0, x = gated_11_cast_fp16)[name = string("op_1195_cast_fp16")]; + tensor var_1199 = const()[name = string("op_1199"), val = tensor([0, 2, 1])]; + int32 var_1205 = const()[name = string("op_1205"), val = int32(-1)]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_29_cast_fp16 = transpose(perm = var_1199, x = var_1195_cast_fp16)[name = string("transpose_85")]; + tensor var_1207_cast_fp16 = mul(x = x_29_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1207_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_1205, interleave = input_47_interleave_0, values = (x_29_cast_fp16, var_1207_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_1202_to_fp16 = const()[name = string("op_1202_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_1202_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor var_1212_split_sizes_0 = const()[name = string("op_1212_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1212_axis_0 = const()[name = string("op_1212_axis_0"), val = int32(-1)]; + tensor var_1212_cast_fp16_0, tensor var_1212_cast_fp16_1 = split(axis = var_1212_axis_0, split_sizes = var_1212_split_sizes_0, x = normed_45_cast_fp16)[name = string("op_1212_cast_fp16")]; + tensor layers_1_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_1_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391910976)))]; + tensor hidden_states_19_cast_fp16 = mul(x = var_1212_cast_fp16_0, y = layers_1_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_19_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = tensor([0x1.acp-1])]; + tensor x_31_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_15_promoted_to_fp16)[name = string("x_31_cast_fp16")]; + int32 var_1227 = const()[name = string("op_1227"), val = int32(-1)]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1229_cast_fp16 = mul(x = x_31_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1229_cast_fp16")]; + bool input_49_interleave_0 = const()[name = string("input_49_interleave_0"), val = bool(false)]; + tensor input_49_cast_fp16 = concat(axis = var_1227, interleave = input_49_interleave_0, values = (x_31_cast_fp16, var_1229_cast_fp16))[name = string("input_49_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_1224_to_fp16 = const()[name = string("op_1224_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_1224_to_fp16, x = input_49_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor var_1234_split_sizes_0 = const()[name = string("op_1234_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1234_axis_0 = const()[name = string("op_1234_axis_0"), val = int32(-1)]; + tensor var_1234_cast_fp16_0, tensor var_1234_cast_fp16_1 = split(axis = var_1234_axis_0, split_sizes = var_1234_split_sizes_0, x = normed_49_cast_fp16)[name = string("op_1234_cast_fp16")]; + tensor layers_2_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391916160)))]; + tensor h_13_cast_fp16 = mul(x = var_1234_cast_fp16_0, y = layers_2_input_layernorm_weight_promoted_to_fp16)[name = string("h_13_cast_fp16")]; + tensor var_1240 = const()[name = string("op_1240"), val = tensor([0, 2, 1])]; + tensor var_1243_axes_0 = const()[name = string("op_1243_axes_0"), val = tensor([2])]; + tensor var_1241_cast_fp16 = transpose(perm = var_1240, x = h_13_cast_fp16)[name = string("transpose_84")]; + tensor var_1243_cast_fp16 = expand_dims(axes = var_1243_axes_0, x = var_1241_cast_fp16)[name = string("op_1243_cast_fp16")]; + string q_21_pad_type_0 = const()[name = string("q_21_pad_type_0"), val = string("valid")]; + tensor q_21_strides_0 = const()[name = string("q_21_strides_0"), val = tensor([1, 1])]; + tensor q_21_pad_0 = const()[name = string("q_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_21_dilations_0 = const()[name = string("q_21_dilations_0"), val = tensor([1, 1])]; + int32 q_21_groups_0 = const()[name = string("q_21_groups_0"), val = int32(1)]; + tensor q_21 = conv(dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = layers_2_self_attn_q_proj_weight_palettized, x = var_1243_cast_fp16)[name = string("q_21")]; + tensor var_1264 = const()[name = string("op_1264"), val = tensor([1, 8, 512, 3])]; + tensor var_1265 = reshape(shape = var_1264, x = q_21)[name = string("op_1265")]; + tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_1288 = const()[name = string("op_1288"), val = tensor([3, 8, 512])]; + tensor transpose_40 = transpose(perm = transpose_40_perm_0, x = var_1265)[name = string("transpose_83")]; + tensor x_33 = reshape(shape = var_1288, x = transpose_40)[name = string("x_33")]; + int32 var_1294 = const()[name = string("op_1294"), val = int32(-1)]; + fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; + tensor var_1296 = mul(x = x_33, y = const_17_promoted)[name = string("op_1296")]; + bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; + tensor input_53 = concat(axis = var_1294, interleave = input_53_interleave_0, values = (x_33, var_1296))[name = string("input_53")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_1291_to_fp16 = const()[name = string("op_1291_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_1291_to_fp16, x = input_53)[name = string("normed_53_cast_fp16")]; + tensor var_1301_split_sizes_0 = const()[name = string("op_1301_split_sizes_0"), val = tensor([512, 512])]; + int32 var_1301_axis_0 = const()[name = string("op_1301_axis_0"), val = int32(-1)]; + tensor var_1301_0, tensor var_1301_1 = split(axis = var_1301_axis_0, split_sizes = var_1301_split_sizes_0, x = normed_53_cast_fp16)[name = string("op_1301")]; + tensor q_25 = mul(x = var_1301_0, y = layers_2_self_attn_q_norm_weight)[name = string("q_25")]; + tensor var_1308 = const()[name = string("op_1308"), val = tensor([1, 3, 8, 512])]; + tensor var_1309 = reshape(shape = var_1308, x = q_25)[name = string("op_1309")]; + tensor var_1314 = const()[name = string("op_1314"), val = tensor([0, 2, 1, 3])]; + tensor q_27 = transpose(perm = var_1314, x = var_1309)[name = string("transpose_82")]; + tensor var_1316_cast_fp16 = mul(x = q_27, y = cos_f)[name = string("op_1316_cast_fp16")]; + tensor var_1317_split_sizes_0 = const()[name = string("op_1317_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1317_axis_0 = const()[name = string("op_1317_axis_0"), val = int32(-1)]; + tensor var_1317_0, tensor var_1317_1 = split(axis = var_1317_axis_0, split_sizes = var_1317_split_sizes_0, x = q_27)[name = string("op_1317")]; + fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)]; + tensor var_1319 = mul(x = var_1317_1, y = const_18_promoted)[name = string("op_1319")]; + int32 var_1321 = const()[name = string("op_1321"), val = int32(-1)]; + bool var_1322_interleave_0 = const()[name = string("op_1322_interleave_0"), val = bool(false)]; + tensor var_1322 = concat(axis = var_1321, interleave = var_1322_interleave_0, values = (var_1319, var_1317_0))[name = string("op_1322")]; + tensor var_1323_cast_fp16 = mul(x = var_1322, y = sin_f)[name = string("op_1323_cast_fp16")]; + tensor q_29_cast_fp16 = add(x = var_1316_cast_fp16, y = var_1323_cast_fp16)[name = string("q_29_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = string("transpose_8_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_4_reps_0 = const()[name = string("tile_4_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = kv14_k)[name = string("transpose_81")]; + tensor tile_4_cast_fp16 = tile(reps = tile_4_reps_0, x = transpose_8_cast_fp16)[name = string("tile_4_cast_fp16")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_8, x = tile_4_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_80")]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_9, x = transpose_9_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([1, 0, -1, -2])]; + tensor transpose_10_perm_0 = const()[name = string("transpose_10_perm_0"), val = tensor([1, 0, 2, 3])]; + tensor tile_5_reps_0 = const()[name = string("tile_5_reps_0"), val = tensor([4, 1, 1, 1])]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = kv14_v)[name = string("transpose_79")]; + tensor tile_5_cast_fp16 = tile(reps = tile_5_reps_0, x = transpose_10_cast_fp16)[name = string("tile_5_cast_fp16")]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([4, 2, 1, 2048, 512])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_10, x = tile_5_cast_fp16)[name = string("reshape_10_cast_fp16")]; + tensor transpose_11_perm_0 = const()[name = string("transpose_11_perm_0"), val = tensor([1, 0, 2, 3, 4])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([-1, 1, 2048, 512])]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = reshape_10_cast_fp16)[name = string("transpose_78")]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_11, x = transpose_11_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor V_expanded_5_perm_0 = const()[name = string("V_expanded_5_perm_0"), val = tensor([1, 0, -2, -1])]; + bool attn_weights_9_transpose_x_0 = const()[name = string("attn_weights_9_transpose_x_0"), val = bool(false)]; + bool attn_weights_9_transpose_y_0 = const()[name = string("attn_weights_9_transpose_y_0"), val = bool(false)]; + tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = reshape_9_cast_fp16)[name = string("transpose_77")]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = q_29_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_35_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask_full)[name = string("x_35_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_35_cast_fp16)[name = string("reduce_max_2")]; + tensor var_1355 = sub(x = x_35_cast_fp16, y = reduce_max_2)[name = string("op_1355")]; + tensor var_1361 = exp(x = var_1355)[name = string("op_1361")]; + tensor var_1371_axes_0 = const()[name = string("op_1371_axes_0"), val = tensor([-1])]; + bool var_1371_keep_dims_0 = const()[name = string("op_1371_keep_dims_0"), val = bool(true)]; + tensor var_1371 = reduce_sum(axes = var_1371_axes_0, keep_dims = var_1371_keep_dims_0, x = var_1361)[name = string("op_1371")]; + tensor var_1377_cast_fp16 = real_div(x = var_1361, y = var_1371)[name = string("op_1377_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor V_expanded_5_cast_fp16 = transpose(perm = V_expanded_5_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_76")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_1377_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_1388 = const()[name = string("op_1388"), val = tensor([0, 2, 1, 3])]; + tensor var_1395 = const()[name = string("op_1395"), val = tensor([1, 3, -1])]; + tensor var_1389_cast_fp16 = transpose(perm = var_1388, x = attn_output_13_cast_fp16)[name = string("transpose_75")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_1395, x = var_1389_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_1400 = const()[name = string("op_1400"), val = tensor([0, 2, 1])]; + string var_1416_pad_type_0 = const()[name = string("op_1416_pad_type_0"), val = string("valid")]; + int32 var_1416_groups_0 = const()[name = string("op_1416_groups_0"), val = int32(1)]; + tensor var_1416_strides_0 = const()[name = string("op_1416_strides_0"), val = tensor([1])]; + tensor var_1416_pad_0 = const()[name = string("op_1416_pad_0"), val = tensor([0, 0])]; + tensor var_1416_dilations_0 = const()[name = string("op_1416_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391921344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397164288))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1401_cast_fp16 = transpose(perm = var_1400, x = attn_output_15_cast_fp16)[name = string("transpose_74")]; + tensor var_1416_cast_fp16 = conv(dilations = var_1416_dilations_0, groups = var_1416_groups_0, pad = var_1416_pad_0, pad_type = var_1416_pad_type_0, strides = var_1416_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_1401_cast_fp16)[name = string("op_1416_cast_fp16")]; + tensor var_1420 = const()[name = string("op_1420"), val = tensor([0, 2, 1])]; + int32 var_1426 = const()[name = string("op_1426"), val = int32(-1)]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_39_cast_fp16 = transpose(perm = var_1420, x = var_1416_cast_fp16)[name = string("transpose_73")]; + tensor var_1428_cast_fp16 = mul(x = x_39_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1428_cast_fp16")]; + bool input_57_interleave_0 = const()[name = string("input_57_interleave_0"), val = bool(false)]; + tensor input_57_cast_fp16 = concat(axis = var_1426, interleave = input_57_interleave_0, values = (x_39_cast_fp16, var_1428_cast_fp16))[name = string("input_57_cast_fp16")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_1423_to_fp16, x = input_57_cast_fp16)[name = string("normed_57_cast_fp16")]; + tensor var_1433_split_sizes_0 = const()[name = string("op_1433_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1433_axis_0 = const()[name = string("op_1433_axis_0"), val = int32(-1)]; + tensor var_1433_cast_fp16_0, tensor var_1433_cast_fp16_1 = split(axis = var_1433_axis_0, split_sizes = var_1433_split_sizes_0, x = normed_57_cast_fp16)[name = string("op_1433_cast_fp16")]; + tensor layers_2_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397166912)))]; + tensor attn_output_17_cast_fp16 = mul(x = var_1433_cast_fp16_0, y = layers_2_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor x_41_cast_fp16 = add(x = x_31_cast_fp16, y = attn_output_17_cast_fp16)[name = string("x_41_cast_fp16")]; + int32 var_1442 = const()[name = string("op_1442"), val = int32(-1)]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1444_cast_fp16 = mul(x = x_41_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1444_cast_fp16")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59_cast_fp16 = concat(axis = var_1442, interleave = input_59_interleave_0, values = (x_41_cast_fp16, var_1444_cast_fp16))[name = string("input_59_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_1439_to_fp16 = const()[name = string("op_1439_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_1439_to_fp16, x = input_59_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor var_1449_split_sizes_0 = const()[name = string("op_1449_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1449_axis_0 = const()[name = string("op_1449_axis_0"), val = int32(-1)]; + tensor var_1449_cast_fp16_0, tensor var_1449_cast_fp16_1 = split(axis = var_1449_axis_0, split_sizes = var_1449_split_sizes_0, x = normed_61_cast_fp16)[name = string("op_1449_cast_fp16")]; + tensor layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397172096)))]; + tensor h_15_cast_fp16 = mul(x = var_1449_cast_fp16_0, y = layers_2_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_15_cast_fp16")]; + tensor var_1460 = const()[name = string("op_1460"), val = tensor([0, 2, 1])]; + tensor input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor([2])]; + tensor var_1461 = transpose(perm = var_1460, x = h_15_cast_fp16)[name = string("transpose_72")]; + tensor input_61 = expand_dims(axes = input_61_axes_0, x = var_1461)[name = string("input_61")]; + string gate_9_pad_type_0 = const()[name = string("gate_9_pad_type_0"), val = string("valid")]; + tensor gate_9_strides_0 = const()[name = string("gate_9_strides_0"), val = tensor([1, 1])]; + tensor gate_9_pad_0 = const()[name = string("gate_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_9_dilations_0 = const()[name = string("gate_9_dilations_0"), val = tensor([1, 1])]; + int32 gate_9_groups_0 = const()[name = string("gate_9_groups_0"), val = int32(1)]; + tensor gate_9 = conv(dilations = gate_9_dilations_0, groups = gate_9_groups_0, pad = gate_9_pad_0, pad_type = gate_9_pad_type_0, strides = gate_9_strides_0, weight = layers_2_mlp_gate_proj_weight_palettized, x = input_61)[name = string("gate_9")]; + string up_5_pad_type_0 = const()[name = string("up_5_pad_type_0"), val = string("valid")]; + tensor up_5_strides_0 = const()[name = string("up_5_strides_0"), val = tensor([1, 1])]; + tensor up_5_pad_0 = const()[name = string("up_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_5_dilations_0 = const()[name = string("up_5_dilations_0"), val = tensor([1, 1])]; + int32 up_5_groups_0 = const()[name = string("up_5_groups_0"), val = int32(1)]; + tensor up_5 = conv(dilations = up_5_dilations_0, groups = up_5_groups_0, pad = up_5_pad_0, pad_type = up_5_pad_type_0, strides = up_5_strides_0, weight = layers_2_mlp_up_proj_weight_palettized, x = input_61)[name = string("up_5")]; + string gate_11_mode_0 = const()[name = string("gate_11_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_11 = gelu(mode = gate_11_mode_0, x = gate_9)[name = string("gate_11")]; + tensor input_63 = mul(x = gate_11, y = up_5)[name = string("input_63")]; + string mlp_out_5_pad_type_0 = const()[name = string("mlp_out_5_pad_type_0"), val = string("valid")]; + tensor mlp_out_5_strides_0 = const()[name = string("mlp_out_5_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_5_pad_0 = const()[name = string("mlp_out_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_5_dilations_0 = const()[name = string("mlp_out_5_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_5_groups_0 = const()[name = string("mlp_out_5_groups_0"), val = int32(1)]; + tensor mlp_out_5 = conv(dilations = mlp_out_5_dilations_0, groups = mlp_out_5_groups_0, pad = mlp_out_5_pad_0, pad_type = mlp_out_5_pad_type_0, strides = mlp_out_5_strides_0, weight = layers_2_mlp_down_proj_weight_palettized, x = input_63)[name = string("mlp_out_5")]; + tensor var_1501_axes_0 = const()[name = string("op_1501_axes_0"), val = tensor([2])]; + tensor var_1501 = squeeze(axes = var_1501_axes_0, x = mlp_out_5)[name = string("op_1501")]; + tensor var_1505 = const()[name = string("op_1505"), val = tensor([0, 2, 1])]; + int32 var_1511 = const()[name = string("op_1511"), val = int32(-1)]; + fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; + tensor x_43 = transpose(perm = var_1505, x = var_1501)[name = string("transpose_71")]; + tensor var_1513 = mul(x = x_43, y = const_21_promoted)[name = string("op_1513")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65 = concat(axis = var_1511, interleave = input_65_interleave_0, values = (x_43, var_1513))[name = string("input_65")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_1508_to_fp16 = const()[name = string("op_1508_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_1508_to_fp16, x = input_65)[name = string("normed_65_cast_fp16")]; + tensor var_1518_split_sizes_0 = const()[name = string("op_1518_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1518_axis_0 = const()[name = string("op_1518_axis_0"), val = int32(-1)]; + tensor var_1518_0, tensor var_1518_1 = split(axis = var_1518_axis_0, split_sizes = var_1518_split_sizes_0, x = normed_65_cast_fp16)[name = string("op_1518")]; + tensor hidden_states_23 = mul(x = var_1518_0, y = layers_2_post_feedforward_layernorm_weight)[name = string("hidden_states_23")]; + tensor hidden_states_25_cast_fp16 = add(x = x_41_cast_fp16, y = hidden_states_23)[name = string("hidden_states_25_cast_fp16")]; + tensor per_layer_slice_5_begin_0 = const()[name = string("per_layer_slice_5_begin_0"), val = tensor([0, 0, 8960])]; + tensor per_layer_slice_5_end_0 = const()[name = string("per_layer_slice_5_end_0"), val = tensor([1, 3, 9216])]; + tensor per_layer_slice_5_end_mask_0 = const()[name = string("per_layer_slice_5_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_5_cast_fp16 = slice_by_index(begin = per_layer_slice_5_begin_0, end = per_layer_slice_5_end_0, end_mask = per_layer_slice_5_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_5_cast_fp16")]; + tensor var_1546 = const()[name = string("op_1546"), val = tensor([0, 2, 1])]; + tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; + tensor var_1547 = transpose(perm = var_1546, x = hidden_states_25_cast_fp16)[name = string("transpose_70")]; + tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_1547)[name = string("input_67")]; + string gated_13_pad_type_0 = const()[name = string("gated_13_pad_type_0"), val = string("valid")]; + tensor gated_13_strides_0 = const()[name = string("gated_13_strides_0"), val = tensor([1, 1])]; + tensor gated_13_pad_0 = const()[name = string("gated_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_13_dilations_0 = const()[name = string("gated_13_dilations_0"), val = tensor([1, 1])]; + int32 gated_13_groups_0 = const()[name = string("gated_13_groups_0"), val = int32(1)]; + tensor gated_13 = conv(dilations = gated_13_dilations_0, groups = gated_13_groups_0, pad = gated_13_pad_0, pad_type = gated_13_pad_type_0, strides = gated_13_strides_0, weight = layers_2_per_layer_input_gate_weight_palettized, x = input_67)[name = string("gated_13")]; + string gated_15_mode_0 = const()[name = string("gated_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_15 = gelu(mode = gated_15_mode_0, x = gated_13)[name = string("gated_15")]; + tensor var_1566 = const()[name = string("op_1566"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_5_axes_0 = const()[name = string("per_layer_slice_conv_5_axes_0"), val = tensor([2])]; + tensor var_1567_cast_fp16 = transpose(perm = var_1566, x = per_layer_slice_5_cast_fp16)[name = string("transpose_69")]; + tensor per_layer_slice_conv_5_cast_fp16 = expand_dims(axes = per_layer_slice_conv_5_axes_0, x = var_1567_cast_fp16)[name = string("per_layer_slice_conv_5_cast_fp16")]; + tensor input_69_cast_fp16 = mul(x = gated_15, y = per_layer_slice_conv_5_cast_fp16)[name = string("input_69_cast_fp16")]; + string gated_17_pad_type_0 = const()[name = string("gated_17_pad_type_0"), val = string("valid")]; + tensor gated_17_strides_0 = const()[name = string("gated_17_strides_0"), val = tensor([1, 1])]; + tensor gated_17_pad_0 = const()[name = string("gated_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_17_dilations_0 = const()[name = string("gated_17_dilations_0"), val = tensor([1, 1])]; + int32 gated_17_groups_0 = const()[name = string("gated_17_groups_0"), val = int32(1)]; + tensor layers_2_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397177280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397505024))))[name = string("layers_2_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_17_cast_fp16 = conv(dilations = gated_17_dilations_0, groups = gated_17_groups_0, pad = gated_17_pad_0, pad_type = gated_17_pad_type_0, strides = gated_17_strides_0, weight = layers_2_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_69_cast_fp16)[name = string("gated_17_cast_fp16")]; + tensor var_1583_axes_0 = const()[name = string("op_1583_axes_0"), val = tensor([2])]; + tensor var_1583_cast_fp16 = squeeze(axes = var_1583_axes_0, x = gated_17_cast_fp16)[name = string("op_1583_cast_fp16")]; + tensor var_1587 = const()[name = string("op_1587"), val = tensor([0, 2, 1])]; + int32 var_1593 = const()[name = string("op_1593"), val = int32(-1)]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_45_cast_fp16 = transpose(perm = var_1587, x = var_1583_cast_fp16)[name = string("transpose_68")]; + tensor var_1595_cast_fp16 = mul(x = x_45_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1595_cast_fp16")]; + bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; + tensor input_71_cast_fp16 = concat(axis = var_1593, interleave = input_71_interleave_0, values = (x_45_cast_fp16, var_1595_cast_fp16))[name = string("input_71_cast_fp16")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_1590_to_fp16 = const()[name = string("op_1590_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_1590_to_fp16, x = input_71_cast_fp16)[name = string("normed_69_cast_fp16")]; + tensor var_1600_split_sizes_0 = const()[name = string("op_1600_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1600_axis_0 = const()[name = string("op_1600_axis_0"), val = int32(-1)]; + tensor var_1600_cast_fp16_0, tensor var_1600_cast_fp16_1 = split(axis = var_1600_axis_0, split_sizes = var_1600_split_sizes_0, x = normed_69_cast_fp16)[name = string("op_1600_cast_fp16")]; + tensor layers_2_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_2_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397507648)))]; + tensor hidden_states_29_cast_fp16 = mul(x = var_1600_cast_fp16_0, y = layers_2_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; + tensor const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = tensor([0x1.acp-1])]; + tensor x_47_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_23_promoted_to_fp16)[name = string("x_47_cast_fp16")]; + int32 var_1615 = const()[name = string("op_1615"), val = int32(-1)]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1617_cast_fp16 = mul(x = x_47_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1617_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_1615, interleave = input_73_interleave_0, values = (x_47_cast_fp16, var_1617_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_1612_to_fp16 = const()[name = string("op_1612_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_1612_to_fp16, x = input_73_cast_fp16)[name = string("normed_73_cast_fp16")]; + tensor var_1622_split_sizes_0 = const()[name = string("op_1622_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1622_axis_0 = const()[name = string("op_1622_axis_0"), val = int32(-1)]; + tensor var_1622_cast_fp16_0, tensor var_1622_cast_fp16_1 = split(axis = var_1622_axis_0, split_sizes = var_1622_split_sizes_0, x = normed_73_cast_fp16)[name = string("op_1622_cast_fp16")]; + tensor layers_3_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397512832)))]; + tensor h_19_cast_fp16 = mul(x = var_1622_cast_fp16_0, y = layers_3_input_layernorm_weight_promoted_to_fp16)[name = string("h_19_cast_fp16")]; + tensor var_1628 = const()[name = string("op_1628"), val = tensor([0, 2, 1])]; + tensor var_1631_axes_0 = const()[name = string("op_1631_axes_0"), val = tensor([2])]; + tensor var_1629_cast_fp16 = transpose(perm = var_1628, x = h_19_cast_fp16)[name = string("transpose_67")]; + tensor var_1631_cast_fp16 = expand_dims(axes = var_1631_axes_0, x = var_1629_cast_fp16)[name = string("op_1631_cast_fp16")]; + string q_31_pad_type_0 = const()[name = string("q_31_pad_type_0"), val = string("valid")]; + tensor q_31_strides_0 = const()[name = string("q_31_strides_0"), val = tensor([1, 1])]; + tensor q_31_pad_0 = const()[name = string("q_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_31_dilations_0 = const()[name = string("q_31_dilations_0"), val = tensor([1, 1])]; + int32 q_31_groups_0 = const()[name = string("q_31_groups_0"), val = int32(1)]; + tensor q_31 = conv(dilations = q_31_dilations_0, groups = q_31_groups_0, pad = q_31_pad_0, pad_type = q_31_pad_type_0, strides = q_31_strides_0, weight = layers_3_self_attn_q_proj_weight_palettized, x = var_1631_cast_fp16)[name = string("q_31")]; + tensor var_1652 = const()[name = string("op_1652"), val = tensor([1, 8, 256, 3])]; + tensor var_1653 = reshape(shape = var_1652, x = q_31)[name = string("op_1653")]; + tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_1676 = const()[name = string("op_1676"), val = tensor([3, 8, 256])]; + tensor transpose_42 = transpose(perm = transpose_42_perm_0, x = var_1653)[name = string("transpose_66")]; + tensor x_49 = reshape(shape = var_1676, x = transpose_42)[name = string("x_49")]; + int32 var_1682 = const()[name = string("op_1682"), val = int32(-1)]; + fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; + tensor var_1684 = mul(x = x_49, y = const_25_promoted)[name = string("op_1684")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77 = concat(axis = var_1682, interleave = input_77_interleave_0, values = (x_49, var_1684))[name = string("input_77")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_1679_to_fp16 = const()[name = string("op_1679_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_1679_to_fp16, x = input_77)[name = string("normed_77_cast_fp16")]; + tensor var_1689_split_sizes_0 = const()[name = string("op_1689_split_sizes_0"), val = tensor([256, 256])]; + int32 var_1689_axis_0 = const()[name = string("op_1689_axis_0"), val = int32(-1)]; + tensor var_1689_0, tensor var_1689_1 = split(axis = var_1689_axis_0, split_sizes = var_1689_split_sizes_0, x = normed_77_cast_fp16)[name = string("op_1689")]; + tensor q_35 = mul(x = var_1689_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_35")]; + tensor var_1696 = const()[name = string("op_1696"), val = tensor([1, 3, 8, 256])]; + tensor var_1697 = reshape(shape = var_1696, x = q_35)[name = string("op_1697")]; + tensor var_1702 = const()[name = string("op_1702"), val = tensor([0, 2, 1, 3])]; + tensor q_37 = transpose(perm = var_1702, x = var_1697)[name = string("transpose_65")]; + tensor var_1704_cast_fp16 = mul(x = q_37, y = cos_s)[name = string("op_1704_cast_fp16")]; + tensor var_1705_split_sizes_0 = const()[name = string("op_1705_split_sizes_0"), val = tensor([128, 128])]; + int32 var_1705_axis_0 = const()[name = string("op_1705_axis_0"), val = int32(-1)]; + tensor var_1705_0, tensor var_1705_1 = split(axis = var_1705_axis_0, split_sizes = var_1705_split_sizes_0, x = q_37)[name = string("op_1705")]; + fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; + tensor var_1707 = mul(x = var_1705_1, y = const_26_promoted)[name = string("op_1707")]; + int32 var_1709 = const()[name = string("op_1709"), val = int32(-1)]; + bool var_1710_interleave_0 = const()[name = string("op_1710_interleave_0"), val = bool(false)]; + tensor var_1710 = concat(axis = var_1709, interleave = var_1710_interleave_0, values = (var_1707, var_1705_0))[name = string("op_1710")]; + tensor var_1711_cast_fp16 = mul(x = var_1710, y = sin_s)[name = string("op_1711_cast_fp16")]; + tensor q_39_cast_fp16 = add(x = var_1704_cast_fp16, y = var_1711_cast_fp16)[name = string("q_39_cast_fp16")]; + bool attn_weights_13_transpose_x_0 = const()[name = string("attn_weights_13_transpose_x_0"), val = bool(false)]; + bool attn_weights_13_transpose_y_0 = const()[name = string("attn_weights_13_transpose_y_0"), val = bool(false)]; + tensor attn_weights_13_cast_fp16 = matmul(transpose_x = attn_weights_13_transpose_x_0, transpose_y = attn_weights_13_transpose_y_0, x = q_39_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask_sliding)[name = string("x_51_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_51_cast_fp16)[name = string("reduce_max_3")]; + tensor var_1743 = sub(x = x_51_cast_fp16, y = reduce_max_3)[name = string("op_1743")]; + tensor var_1749 = exp(x = var_1743)[name = string("op_1749")]; + tensor var_1759_axes_0 = const()[name = string("op_1759_axes_0"), val = tensor([-1])]; + bool var_1759_keep_dims_0 = const()[name = string("op_1759_keep_dims_0"), val = bool(true)]; + tensor var_1759 = reduce_sum(axes = var_1759_axes_0, keep_dims = var_1759_keep_dims_0, x = var_1749)[name = string("op_1759")]; + tensor var_1765_cast_fp16 = real_div(x = var_1749, y = var_1759)[name = string("op_1765_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = var_1765_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_1776 = const()[name = string("op_1776"), val = tensor([0, 2, 1, 3])]; + tensor var_1783 = const()[name = string("op_1783"), val = tensor([1, 3, -1])]; + tensor var_1777_cast_fp16 = transpose(perm = var_1776, x = attn_output_19_cast_fp16)[name = string("transpose_64")]; + tensor attn_output_21_cast_fp16 = reshape(shape = var_1783, x = var_1777_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1788 = const()[name = string("op_1788"), val = tensor([0, 2, 1])]; + string var_1804_pad_type_0 = const()[name = string("op_1804_pad_type_0"), val = string("valid")]; + int32 var_1804_groups_0 = const()[name = string("op_1804_groups_0"), val = int32(1)]; + tensor var_1804_strides_0 = const()[name = string("op_1804_strides_0"), val = tensor([1])]; + tensor var_1804_pad_0 = const()[name = string("op_1804_pad_0"), val = tensor([0, 0])]; + tensor var_1804_dilations_0 = const()[name = string("op_1804_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397518016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400139520))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1789_cast_fp16 = transpose(perm = var_1788, x = attn_output_21_cast_fp16)[name = string("transpose_63")]; + tensor var_1804_cast_fp16 = conv(dilations = var_1804_dilations_0, groups = var_1804_groups_0, pad = var_1804_pad_0, pad_type = var_1804_pad_type_0, strides = var_1804_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_1789_cast_fp16)[name = string("op_1804_cast_fp16")]; + tensor var_1808 = const()[name = string("op_1808"), val = tensor([0, 2, 1])]; + int32 var_1814 = const()[name = string("op_1814"), val = int32(-1)]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_55_cast_fp16 = transpose(perm = var_1808, x = var_1804_cast_fp16)[name = string("transpose_62")]; + tensor var_1816_cast_fp16 = mul(x = x_55_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_1816_cast_fp16")]; + bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; + tensor input_81_cast_fp16 = concat(axis = var_1814, interleave = input_81_interleave_0, values = (x_55_cast_fp16, var_1816_cast_fp16))[name = string("input_81_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_1811_to_fp16 = const()[name = string("op_1811_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_1811_to_fp16, x = input_81_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor var_1821_split_sizes_0 = const()[name = string("op_1821_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1821_axis_0 = const()[name = string("op_1821_axis_0"), val = int32(-1)]; + tensor var_1821_cast_fp16_0, tensor var_1821_cast_fp16_1 = split(axis = var_1821_axis_0, split_sizes = var_1821_split_sizes_0, x = normed_81_cast_fp16)[name = string("op_1821_cast_fp16")]; + tensor layers_3_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400142144)))]; + tensor attn_output_23_cast_fp16 = mul(x = var_1821_cast_fp16_0, y = layers_3_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_47_cast_fp16, y = attn_output_23_cast_fp16)[name = string("x_57_cast_fp16")]; + int32 var_1830 = const()[name = string("op_1830"), val = int32(-1)]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1832_cast_fp16 = mul(x = x_57_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_1832_cast_fp16")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83_cast_fp16 = concat(axis = var_1830, interleave = input_83_interleave_0, values = (x_57_cast_fp16, var_1832_cast_fp16))[name = string("input_83_cast_fp16")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_1827_to_fp16 = const()[name = string("op_1827_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_1827_to_fp16, x = input_83_cast_fp16)[name = string("normed_85_cast_fp16")]; + tensor var_1837_split_sizes_0 = const()[name = string("op_1837_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1837_axis_0 = const()[name = string("op_1837_axis_0"), val = int32(-1)]; + tensor var_1837_cast_fp16_0, tensor var_1837_cast_fp16_1 = split(axis = var_1837_axis_0, split_sizes = var_1837_split_sizes_0, x = normed_85_cast_fp16)[name = string("op_1837_cast_fp16")]; + tensor layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400147328)))]; + tensor h_21_cast_fp16 = mul(x = var_1837_cast_fp16_0, y = layers_3_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_21_cast_fp16")]; + tensor var_1848 = const()[name = string("op_1848"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_1849 = transpose(perm = var_1848, x = h_21_cast_fp16)[name = string("transpose_61")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_1849)[name = string("input_85")]; + string gate_13_pad_type_0 = const()[name = string("gate_13_pad_type_0"), val = string("valid")]; + tensor gate_13_strides_0 = const()[name = string("gate_13_strides_0"), val = tensor([1, 1])]; + tensor gate_13_pad_0 = const()[name = string("gate_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_13_dilations_0 = const()[name = string("gate_13_dilations_0"), val = tensor([1, 1])]; + int32 gate_13_groups_0 = const()[name = string("gate_13_groups_0"), val = int32(1)]; + tensor gate_13 = conv(dilations = gate_13_dilations_0, groups = gate_13_groups_0, pad = gate_13_pad_0, pad_type = gate_13_pad_type_0, strides = gate_13_strides_0, weight = layers_3_mlp_gate_proj_weight_palettized, x = input_85)[name = string("gate_13")]; + string up_7_pad_type_0 = const()[name = string("up_7_pad_type_0"), val = string("valid")]; + tensor up_7_strides_0 = const()[name = string("up_7_strides_0"), val = tensor([1, 1])]; + tensor up_7_pad_0 = const()[name = string("up_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_7_dilations_0 = const()[name = string("up_7_dilations_0"), val = tensor([1, 1])]; + int32 up_7_groups_0 = const()[name = string("up_7_groups_0"), val = int32(1)]; + tensor up_7 = conv(dilations = up_7_dilations_0, groups = up_7_groups_0, pad = up_7_pad_0, pad_type = up_7_pad_type_0, strides = up_7_strides_0, weight = layers_3_mlp_up_proj_weight_palettized, x = input_85)[name = string("up_7")]; + string gate_15_mode_0 = const()[name = string("gate_15_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_15 = gelu(mode = gate_15_mode_0, x = gate_13)[name = string("gate_15")]; + tensor input_87 = mul(x = gate_15, y = up_7)[name = string("input_87")]; + string mlp_out_7_pad_type_0 = const()[name = string("mlp_out_7_pad_type_0"), val = string("valid")]; + tensor mlp_out_7_strides_0 = const()[name = string("mlp_out_7_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_7_pad_0 = const()[name = string("mlp_out_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_7_dilations_0 = const()[name = string("mlp_out_7_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_7_groups_0 = const()[name = string("mlp_out_7_groups_0"), val = int32(1)]; + tensor mlp_out_7 = conv(dilations = mlp_out_7_dilations_0, groups = mlp_out_7_groups_0, pad = mlp_out_7_pad_0, pad_type = mlp_out_7_pad_type_0, strides = mlp_out_7_strides_0, weight = layers_3_mlp_down_proj_weight_palettized, x = input_87)[name = string("mlp_out_7")]; + tensor var_1889_axes_0 = const()[name = string("op_1889_axes_0"), val = tensor([2])]; + tensor var_1889 = squeeze(axes = var_1889_axes_0, x = mlp_out_7)[name = string("op_1889")]; + tensor var_1893 = const()[name = string("op_1893"), val = tensor([0, 2, 1])]; + int32 var_1899 = const()[name = string("op_1899"), val = int32(-1)]; + fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; + tensor x_59 = transpose(perm = var_1893, x = var_1889)[name = string("transpose_60")]; + tensor var_1901 = mul(x = x_59, y = const_29_promoted)[name = string("op_1901")]; + bool input_89_interleave_0 = const()[name = string("input_89_interleave_0"), val = bool(false)]; + tensor input_89 = concat(axis = var_1899, interleave = input_89_interleave_0, values = (x_59, var_1901))[name = string("input_89")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_1896_to_fp16 = const()[name = string("op_1896_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_1896_to_fp16, x = input_89)[name = string("normed_89_cast_fp16")]; + tensor var_1906_split_sizes_0 = const()[name = string("op_1906_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1906_axis_0 = const()[name = string("op_1906_axis_0"), val = int32(-1)]; + tensor var_1906_0, tensor var_1906_1 = split(axis = var_1906_axis_0, split_sizes = var_1906_split_sizes_0, x = normed_89_cast_fp16)[name = string("op_1906")]; + tensor hidden_states_33 = mul(x = var_1906_0, y = layers_3_post_feedforward_layernorm_weight)[name = string("hidden_states_33")]; + tensor hidden_states_35_cast_fp16 = add(x = x_57_cast_fp16, y = hidden_states_33)[name = string("hidden_states_35_cast_fp16")]; + tensor per_layer_slice_7_begin_0 = const()[name = string("per_layer_slice_7_begin_0"), val = tensor([0, 0, 9216])]; + tensor per_layer_slice_7_end_0 = const()[name = string("per_layer_slice_7_end_0"), val = tensor([1, 3, 9472])]; + tensor per_layer_slice_7_end_mask_0 = const()[name = string("per_layer_slice_7_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_7_cast_fp16 = slice_by_index(begin = per_layer_slice_7_begin_0, end = per_layer_slice_7_end_0, end_mask = per_layer_slice_7_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_7_cast_fp16")]; + tensor var_1934 = const()[name = string("op_1934"), val = tensor([0, 2, 1])]; + tensor input_91_axes_0 = const()[name = string("input_91_axes_0"), val = tensor([2])]; + tensor var_1935 = transpose(perm = var_1934, x = hidden_states_35_cast_fp16)[name = string("transpose_59")]; + tensor input_91 = expand_dims(axes = input_91_axes_0, x = var_1935)[name = string("input_91")]; + string gated_19_pad_type_0 = const()[name = string("gated_19_pad_type_0"), val = string("valid")]; + tensor gated_19_strides_0 = const()[name = string("gated_19_strides_0"), val = tensor([1, 1])]; + tensor gated_19_pad_0 = const()[name = string("gated_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_19_dilations_0 = const()[name = string("gated_19_dilations_0"), val = tensor([1, 1])]; + int32 gated_19_groups_0 = const()[name = string("gated_19_groups_0"), val = int32(1)]; + tensor gated_19 = conv(dilations = gated_19_dilations_0, groups = gated_19_groups_0, pad = gated_19_pad_0, pad_type = gated_19_pad_type_0, strides = gated_19_strides_0, weight = layers_3_per_layer_input_gate_weight_palettized, x = input_91)[name = string("gated_19")]; + string gated_21_mode_0 = const()[name = string("gated_21_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_21 = gelu(mode = gated_21_mode_0, x = gated_19)[name = string("gated_21")]; + tensor var_1954 = const()[name = string("op_1954"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_7_axes_0 = const()[name = string("per_layer_slice_conv_7_axes_0"), val = tensor([2])]; + tensor var_1955_cast_fp16 = transpose(perm = var_1954, x = per_layer_slice_7_cast_fp16)[name = string("transpose_58")]; + tensor per_layer_slice_conv_7_cast_fp16 = expand_dims(axes = per_layer_slice_conv_7_axes_0, x = var_1955_cast_fp16)[name = string("per_layer_slice_conv_7_cast_fp16")]; + tensor input_93_cast_fp16 = mul(x = gated_21, y = per_layer_slice_conv_7_cast_fp16)[name = string("input_93_cast_fp16")]; + string gated_23_pad_type_0 = const()[name = string("gated_23_pad_type_0"), val = string("valid")]; + tensor gated_23_strides_0 = const()[name = string("gated_23_strides_0"), val = tensor([1, 1])]; + tensor gated_23_pad_0 = const()[name = string("gated_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_23_dilations_0 = const()[name = string("gated_23_dilations_0"), val = tensor([1, 1])]; + int32 gated_23_groups_0 = const()[name = string("gated_23_groups_0"), val = int32(1)]; + tensor layers_3_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400152512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400480256))))[name = string("layers_3_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_23_cast_fp16 = conv(dilations = gated_23_dilations_0, groups = gated_23_groups_0, pad = gated_23_pad_0, pad_type = gated_23_pad_type_0, strides = gated_23_strides_0, weight = layers_3_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_93_cast_fp16)[name = string("gated_23_cast_fp16")]; + tensor var_1971_axes_0 = const()[name = string("op_1971_axes_0"), val = tensor([2])]; + tensor var_1971_cast_fp16 = squeeze(axes = var_1971_axes_0, x = gated_23_cast_fp16)[name = string("op_1971_cast_fp16")]; + tensor var_1975 = const()[name = string("op_1975"), val = tensor([0, 2, 1])]; + int32 var_1981 = const()[name = string("op_1981"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_61_cast_fp16 = transpose(perm = var_1975, x = var_1971_cast_fp16)[name = string("transpose_57")]; + tensor var_1983_cast_fp16 = mul(x = x_61_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_1983_cast_fp16")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95_cast_fp16 = concat(axis = var_1981, interleave = input_95_interleave_0, values = (x_61_cast_fp16, var_1983_cast_fp16))[name = string("input_95_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_1978_to_fp16 = const()[name = string("op_1978_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_1978_to_fp16, x = input_95_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor var_1988_split_sizes_0 = const()[name = string("op_1988_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_1988_axis_0 = const()[name = string("op_1988_axis_0"), val = int32(-1)]; + tensor var_1988_cast_fp16_0, tensor var_1988_cast_fp16_1 = split(axis = var_1988_axis_0, split_sizes = var_1988_split_sizes_0, x = normed_93_cast_fp16)[name = string("op_1988_cast_fp16")]; + tensor layers_3_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_3_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400482880)))]; + tensor hidden_states_39_cast_fp16 = mul(x = var_1988_cast_fp16_0, y = layers_3_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_39_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = tensor([0x1.b6p-1])]; + tensor x_63_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_31_promoted_to_fp16)[name = string("x_63_cast_fp16")]; + int32 var_2003 = const()[name = string("op_2003"), val = int32(-1)]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2005_cast_fp16 = mul(x = x_63_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2005_cast_fp16")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97_cast_fp16 = concat(axis = var_2003, interleave = input_97_interleave_0, values = (x_63_cast_fp16, var_2005_cast_fp16))[name = string("input_97_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_2000_to_fp16 = const()[name = string("op_2000_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_2000_to_fp16, x = input_97_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor var_2010_split_sizes_0 = const()[name = string("op_2010_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2010_axis_0 = const()[name = string("op_2010_axis_0"), val = int32(-1)]; + tensor var_2010_cast_fp16_0, tensor var_2010_cast_fp16_1 = split(axis = var_2010_axis_0, split_sizes = var_2010_split_sizes_0, x = normed_97_cast_fp16)[name = string("op_2010_cast_fp16")]; + tensor layers_4_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400488064)))]; + tensor h_25_cast_fp16 = mul(x = var_2010_cast_fp16_0, y = layers_4_input_layernorm_weight_promoted_to_fp16)[name = string("h_25_cast_fp16")]; + tensor var_2016 = const()[name = string("op_2016"), val = tensor([0, 2, 1])]; + tensor var_2019_axes_0 = const()[name = string("op_2019_axes_0"), val = tensor([2])]; + tensor var_2017_cast_fp16 = transpose(perm = var_2016, x = h_25_cast_fp16)[name = string("transpose_56")]; + tensor var_2019_cast_fp16 = expand_dims(axes = var_2019_axes_0, x = var_2017_cast_fp16)[name = string("op_2019_cast_fp16")]; + string q_41_pad_type_0 = const()[name = string("q_41_pad_type_0"), val = string("valid")]; + tensor q_41_strides_0 = const()[name = string("q_41_strides_0"), val = tensor([1, 1])]; + tensor q_41_pad_0 = const()[name = string("q_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_41_dilations_0 = const()[name = string("q_41_dilations_0"), val = tensor([1, 1])]; + int32 q_41_groups_0 = const()[name = string("q_41_groups_0"), val = int32(1)]; + tensor q_41 = conv(dilations = q_41_dilations_0, groups = q_41_groups_0, pad = q_41_pad_0, pad_type = q_41_pad_type_0, strides = q_41_strides_0, weight = layers_4_self_attn_q_proj_weight_palettized, x = var_2019_cast_fp16)[name = string("q_41")]; + tensor var_2040 = const()[name = string("op_2040"), val = tensor([1, 8, 256, 3])]; + tensor var_2041 = reshape(shape = var_2040, x = q_41)[name = string("op_2041")]; + tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2064 = const()[name = string("op_2064"), val = tensor([3, 8, 256])]; + tensor transpose_44 = transpose(perm = transpose_44_perm_0, x = var_2041)[name = string("transpose_55")]; + tensor x_65 = reshape(shape = var_2064, x = transpose_44)[name = string("x_65")]; + int32 var_2070 = const()[name = string("op_2070"), val = int32(-1)]; + fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; + tensor var_2072 = mul(x = x_65, y = const_33_promoted)[name = string("op_2072")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101 = concat(axis = var_2070, interleave = input_101_interleave_0, values = (x_65, var_2072))[name = string("input_101")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_2067_to_fp16 = const()[name = string("op_2067_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_2067_to_fp16, x = input_101)[name = string("normed_101_cast_fp16")]; + tensor var_2077_split_sizes_0 = const()[name = string("op_2077_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2077_axis_0 = const()[name = string("op_2077_axis_0"), val = int32(-1)]; + tensor var_2077_0, tensor var_2077_1 = split(axis = var_2077_axis_0, split_sizes = var_2077_split_sizes_0, x = normed_101_cast_fp16)[name = string("op_2077")]; + tensor q_45 = mul(x = var_2077_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_45")]; + tensor var_2084 = const()[name = string("op_2084"), val = tensor([1, 3, 8, 256])]; + tensor var_2085 = reshape(shape = var_2084, x = q_45)[name = string("op_2085")]; + tensor var_2090 = const()[name = string("op_2090"), val = tensor([0, 2, 1, 3])]; + tensor q_47 = transpose(perm = var_2090, x = var_2085)[name = string("transpose_54")]; + tensor var_2092_cast_fp16 = mul(x = q_47, y = cos_s)[name = string("op_2092_cast_fp16")]; + tensor var_2093_split_sizes_0 = const()[name = string("op_2093_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2093_axis_0 = const()[name = string("op_2093_axis_0"), val = int32(-1)]; + tensor var_2093_0, tensor var_2093_1 = split(axis = var_2093_axis_0, split_sizes = var_2093_split_sizes_0, x = q_47)[name = string("op_2093")]; + fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; + tensor var_2095 = mul(x = var_2093_1, y = const_34_promoted)[name = string("op_2095")]; + int32 var_2097 = const()[name = string("op_2097"), val = int32(-1)]; + bool var_2098_interleave_0 = const()[name = string("op_2098_interleave_0"), val = bool(false)]; + tensor var_2098 = concat(axis = var_2097, interleave = var_2098_interleave_0, values = (var_2095, var_2093_0))[name = string("op_2098")]; + tensor var_2099_cast_fp16 = mul(x = var_2098, y = sin_s)[name = string("op_2099_cast_fp16")]; + tensor q_49_cast_fp16 = add(x = var_2092_cast_fp16, y = var_2099_cast_fp16)[name = string("q_49_cast_fp16")]; + bool attn_weights_17_transpose_x_0 = const()[name = string("attn_weights_17_transpose_x_0"), val = bool(false)]; + bool attn_weights_17_transpose_y_0 = const()[name = string("attn_weights_17_transpose_y_0"), val = bool(false)]; + tensor attn_weights_17_cast_fp16 = matmul(transpose_x = attn_weights_17_transpose_x_0, transpose_y = attn_weights_17_transpose_y_0, x = q_49_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask_sliding)[name = string("x_67_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_67_cast_fp16)[name = string("reduce_max_4")]; + tensor var_2131 = sub(x = x_67_cast_fp16, y = reduce_max_4)[name = string("op_2131")]; + tensor var_2137 = exp(x = var_2131)[name = string("op_2137")]; + tensor var_2147_axes_0 = const()[name = string("op_2147_axes_0"), val = tensor([-1])]; + bool var_2147_keep_dims_0 = const()[name = string("op_2147_keep_dims_0"), val = bool(true)]; + tensor var_2147 = reduce_sum(axes = var_2147_axes_0, keep_dims = var_2147_keep_dims_0, x = var_2137)[name = string("op_2147")]; + tensor var_2153_cast_fp16 = real_div(x = var_2137, y = var_2147)[name = string("op_2153_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_2153_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_2164 = const()[name = string("op_2164"), val = tensor([0, 2, 1, 3])]; + tensor var_2171 = const()[name = string("op_2171"), val = tensor([1, 3, -1])]; + tensor var_2165_cast_fp16 = transpose(perm = var_2164, x = attn_output_25_cast_fp16)[name = string("transpose_53")]; + tensor attn_output_27_cast_fp16 = reshape(shape = var_2171, x = var_2165_cast_fp16)[name = string("attn_output_27_cast_fp16")]; + tensor var_2176 = const()[name = string("op_2176"), val = tensor([0, 2, 1])]; + string var_2192_pad_type_0 = const()[name = string("op_2192_pad_type_0"), val = string("valid")]; + int32 var_2192_groups_0 = const()[name = string("op_2192_groups_0"), val = int32(1)]; + tensor var_2192_strides_0 = const()[name = string("op_2192_strides_0"), val = tensor([1])]; + tensor var_2192_pad_0 = const()[name = string("op_2192_pad_0"), val = tensor([0, 0])]; + tensor var_2192_dilations_0 = const()[name = string("op_2192_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400493248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403114752))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2177_cast_fp16 = transpose(perm = var_2176, x = attn_output_27_cast_fp16)[name = string("transpose_52")]; + tensor var_2192_cast_fp16 = conv(dilations = var_2192_dilations_0, groups = var_2192_groups_0, pad = var_2192_pad_0, pad_type = var_2192_pad_type_0, strides = var_2192_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_2177_cast_fp16)[name = string("op_2192_cast_fp16")]; + tensor var_2196 = const()[name = string("op_2196"), val = tensor([0, 2, 1])]; + int32 var_2202 = const()[name = string("op_2202"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_71_cast_fp16 = transpose(perm = var_2196, x = var_2192_cast_fp16)[name = string("transpose_51")]; + tensor var_2204_cast_fp16 = mul(x = x_71_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2204_cast_fp16")]; + bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; + tensor input_105_cast_fp16 = concat(axis = var_2202, interleave = input_105_interleave_0, values = (x_71_cast_fp16, var_2204_cast_fp16))[name = string("input_105_cast_fp16")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_2199_to_fp16 = const()[name = string("op_2199_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_2199_to_fp16, x = input_105_cast_fp16)[name = string("normed_105_cast_fp16")]; + tensor var_2209_split_sizes_0 = const()[name = string("op_2209_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2209_axis_0 = const()[name = string("op_2209_axis_0"), val = int32(-1)]; + tensor var_2209_cast_fp16_0, tensor var_2209_cast_fp16_1 = split(axis = var_2209_axis_0, split_sizes = var_2209_split_sizes_0, x = normed_105_cast_fp16)[name = string("op_2209_cast_fp16")]; + tensor layers_4_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403117376)))]; + tensor attn_output_29_cast_fp16 = mul(x = var_2209_cast_fp16_0, y = layers_4_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_63_cast_fp16, y = attn_output_29_cast_fp16)[name = string("x_73_cast_fp16")]; + int32 var_2218 = const()[name = string("op_2218"), val = int32(-1)]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2220_cast_fp16 = mul(x = x_73_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2220_cast_fp16")]; + bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; + tensor input_107_cast_fp16 = concat(axis = var_2218, interleave = input_107_interleave_0, values = (x_73_cast_fp16, var_2220_cast_fp16))[name = string("input_107_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_2215_to_fp16 = const()[name = string("op_2215_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_2215_to_fp16, x = input_107_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor var_2225_split_sizes_0 = const()[name = string("op_2225_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2225_axis_0 = const()[name = string("op_2225_axis_0"), val = int32(-1)]; + tensor var_2225_cast_fp16_0, tensor var_2225_cast_fp16_1 = split(axis = var_2225_axis_0, split_sizes = var_2225_split_sizes_0, x = normed_109_cast_fp16)[name = string("op_2225_cast_fp16")]; + tensor layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403122560)))]; + tensor h_27_cast_fp16 = mul(x = var_2225_cast_fp16_0, y = layers_4_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_27_cast_fp16")]; + tensor var_2236 = const()[name = string("op_2236"), val = tensor([0, 2, 1])]; + tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([2])]; + tensor var_2237 = transpose(perm = var_2236, x = h_27_cast_fp16)[name = string("transpose_50")]; + tensor input_109 = expand_dims(axes = input_109_axes_0, x = var_2237)[name = string("input_109")]; + string gate_17_pad_type_0 = const()[name = string("gate_17_pad_type_0"), val = string("valid")]; + tensor gate_17_strides_0 = const()[name = string("gate_17_strides_0"), val = tensor([1, 1])]; + tensor gate_17_pad_0 = const()[name = string("gate_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_17_dilations_0 = const()[name = string("gate_17_dilations_0"), val = tensor([1, 1])]; + int32 gate_17_groups_0 = const()[name = string("gate_17_groups_0"), val = int32(1)]; + tensor gate_17 = conv(dilations = gate_17_dilations_0, groups = gate_17_groups_0, pad = gate_17_pad_0, pad_type = gate_17_pad_type_0, strides = gate_17_strides_0, weight = layers_4_mlp_gate_proj_weight_palettized, x = input_109)[name = string("gate_17")]; + string up_9_pad_type_0 = const()[name = string("up_9_pad_type_0"), val = string("valid")]; + tensor up_9_strides_0 = const()[name = string("up_9_strides_0"), val = tensor([1, 1])]; + tensor up_9_pad_0 = const()[name = string("up_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_9_dilations_0 = const()[name = string("up_9_dilations_0"), val = tensor([1, 1])]; + int32 up_9_groups_0 = const()[name = string("up_9_groups_0"), val = int32(1)]; + tensor up_9 = conv(dilations = up_9_dilations_0, groups = up_9_groups_0, pad = up_9_pad_0, pad_type = up_9_pad_type_0, strides = up_9_strides_0, weight = layers_4_mlp_up_proj_weight_palettized, x = input_109)[name = string("up_9")]; + string gate_19_mode_0 = const()[name = string("gate_19_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_19 = gelu(mode = gate_19_mode_0, x = gate_17)[name = string("gate_19")]; + tensor input_111 = mul(x = gate_19, y = up_9)[name = string("input_111")]; + string mlp_out_9_pad_type_0 = const()[name = string("mlp_out_9_pad_type_0"), val = string("valid")]; + tensor mlp_out_9_strides_0 = const()[name = string("mlp_out_9_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_9_pad_0 = const()[name = string("mlp_out_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_9_dilations_0 = const()[name = string("mlp_out_9_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_9_groups_0 = const()[name = string("mlp_out_9_groups_0"), val = int32(1)]; + tensor mlp_out_9 = conv(dilations = mlp_out_9_dilations_0, groups = mlp_out_9_groups_0, pad = mlp_out_9_pad_0, pad_type = mlp_out_9_pad_type_0, strides = mlp_out_9_strides_0, weight = layers_4_mlp_down_proj_weight_palettized, x = input_111)[name = string("mlp_out_9")]; + tensor var_2277_axes_0 = const()[name = string("op_2277_axes_0"), val = tensor([2])]; + tensor var_2277 = squeeze(axes = var_2277_axes_0, x = mlp_out_9)[name = string("op_2277")]; + tensor var_2281 = const()[name = string("op_2281"), val = tensor([0, 2, 1])]; + int32 var_2287 = const()[name = string("op_2287"), val = int32(-1)]; + fp16 const_37_promoted = const()[name = string("const_37_promoted"), val = fp16(-0x1p+0)]; + tensor x_75 = transpose(perm = var_2281, x = var_2277)[name = string("transpose_49")]; + tensor var_2289 = mul(x = x_75, y = const_37_promoted)[name = string("op_2289")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_2287, interleave = input_113_interleave_0, values = (x_75, var_2289))[name = string("input_113")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_2284_to_fp16 = const()[name = string("op_2284_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_2284_to_fp16, x = input_113)[name = string("normed_113_cast_fp16")]; + tensor var_2294_split_sizes_0 = const()[name = string("op_2294_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2294_axis_0 = const()[name = string("op_2294_axis_0"), val = int32(-1)]; + tensor var_2294_0, tensor var_2294_1 = split(axis = var_2294_axis_0, split_sizes = var_2294_split_sizes_0, x = normed_113_cast_fp16)[name = string("op_2294")]; + tensor hidden_states_43 = mul(x = var_2294_0, y = layers_4_post_feedforward_layernorm_weight)[name = string("hidden_states_43")]; + tensor hidden_states_45_cast_fp16 = add(x = x_73_cast_fp16, y = hidden_states_43)[name = string("hidden_states_45_cast_fp16")]; + tensor per_layer_slice_9_begin_0 = const()[name = string("per_layer_slice_9_begin_0"), val = tensor([0, 0, 9472])]; + tensor per_layer_slice_9_end_0 = const()[name = string("per_layer_slice_9_end_0"), val = tensor([1, 3, 9728])]; + tensor per_layer_slice_9_end_mask_0 = const()[name = string("per_layer_slice_9_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_9_cast_fp16 = slice_by_index(begin = per_layer_slice_9_begin_0, end = per_layer_slice_9_end_0, end_mask = per_layer_slice_9_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_9_cast_fp16")]; + tensor var_2322 = const()[name = string("op_2322"), val = tensor([0, 2, 1])]; + tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; + tensor var_2323 = transpose(perm = var_2322, x = hidden_states_45_cast_fp16)[name = string("transpose_48")]; + tensor input_115 = expand_dims(axes = input_115_axes_0, x = var_2323)[name = string("input_115")]; + string gated_25_pad_type_0 = const()[name = string("gated_25_pad_type_0"), val = string("valid")]; + tensor gated_25_strides_0 = const()[name = string("gated_25_strides_0"), val = tensor([1, 1])]; + tensor gated_25_pad_0 = const()[name = string("gated_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_25_dilations_0 = const()[name = string("gated_25_dilations_0"), val = tensor([1, 1])]; + int32 gated_25_groups_0 = const()[name = string("gated_25_groups_0"), val = int32(1)]; + tensor gated_25 = conv(dilations = gated_25_dilations_0, groups = gated_25_groups_0, pad = gated_25_pad_0, pad_type = gated_25_pad_type_0, strides = gated_25_strides_0, weight = layers_4_per_layer_input_gate_weight_palettized, x = input_115)[name = string("gated_25")]; + string gated_27_mode_0 = const()[name = string("gated_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_27 = gelu(mode = gated_27_mode_0, x = gated_25)[name = string("gated_27")]; + tensor var_2342 = const()[name = string("op_2342"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_9_axes_0 = const()[name = string("per_layer_slice_conv_9_axes_0"), val = tensor([2])]; + tensor var_2343_cast_fp16 = transpose(perm = var_2342, x = per_layer_slice_9_cast_fp16)[name = string("transpose_47")]; + tensor per_layer_slice_conv_9_cast_fp16 = expand_dims(axes = per_layer_slice_conv_9_axes_0, x = var_2343_cast_fp16)[name = string("per_layer_slice_conv_9_cast_fp16")]; + tensor input_117_cast_fp16 = mul(x = gated_27, y = per_layer_slice_conv_9_cast_fp16)[name = string("input_117_cast_fp16")]; + string gated_29_pad_type_0 = const()[name = string("gated_29_pad_type_0"), val = string("valid")]; + tensor gated_29_strides_0 = const()[name = string("gated_29_strides_0"), val = tensor([1, 1])]; + tensor gated_29_pad_0 = const()[name = string("gated_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_29_dilations_0 = const()[name = string("gated_29_dilations_0"), val = tensor([1, 1])]; + int32 gated_29_groups_0 = const()[name = string("gated_29_groups_0"), val = int32(1)]; + tensor layers_4_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403127744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403455488))))[name = string("layers_4_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_29_cast_fp16 = conv(dilations = gated_29_dilations_0, groups = gated_29_groups_0, pad = gated_29_pad_0, pad_type = gated_29_pad_type_0, strides = gated_29_strides_0, weight = layers_4_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("gated_29_cast_fp16")]; + tensor var_2359_axes_0 = const()[name = string("op_2359_axes_0"), val = tensor([2])]; + tensor var_2359_cast_fp16 = squeeze(axes = var_2359_axes_0, x = gated_29_cast_fp16)[name = string("op_2359_cast_fp16")]; + tensor var_2363 = const()[name = string("op_2363"), val = tensor([0, 2, 1])]; + int32 var_2369 = const()[name = string("op_2369"), val = int32(-1)]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_77_cast_fp16 = transpose(perm = var_2363, x = var_2359_cast_fp16)[name = string("transpose_46")]; + tensor var_2371_cast_fp16 = mul(x = x_77_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2371_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_2369, interleave = input_119_interleave_0, values = (x_77_cast_fp16, var_2371_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_2366_to_fp16 = const()[name = string("op_2366_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_2366_to_fp16, x = input_119_cast_fp16)[name = string("normed_117_cast_fp16")]; + tensor var_2376_split_sizes_0 = const()[name = string("op_2376_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2376_axis_0 = const()[name = string("op_2376_axis_0"), val = int32(-1)]; + tensor var_2376_cast_fp16_0, tensor var_2376_cast_fp16_1 = split(axis = var_2376_axis_0, split_sizes = var_2376_split_sizes_0, x = normed_117_cast_fp16)[name = string("op_2376_cast_fp16")]; + tensor layers_4_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_4_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403458112)))]; + tensor hidden_states_49_cast_fp16 = mul(x = var_2376_cast_fp16_0, y = layers_4_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_49_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + tensor const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = tensor([0x1.c6p-1])]; + tensor x_79_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_39_promoted_to_fp16)[name = string("x_79_cast_fp16")]; + int32 var_2391 = const()[name = string("op_2391"), val = int32(-1)]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2393_cast_fp16 = mul(x = x_79_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_2393_cast_fp16")]; + bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; + tensor input_121_cast_fp16 = concat(axis = var_2391, interleave = input_121_interleave_0, values = (x_79_cast_fp16, var_2393_cast_fp16))[name = string("input_121_cast_fp16")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_2388_to_fp16 = const()[name = string("op_2388_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_2388_to_fp16, x = input_121_cast_fp16)[name = string("normed_121_cast_fp16")]; + tensor var_2398_split_sizes_0 = const()[name = string("op_2398_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2398_axis_0 = const()[name = string("op_2398_axis_0"), val = int32(-1)]; + tensor var_2398_cast_fp16_0, tensor var_2398_cast_fp16_1 = split(axis = var_2398_axis_0, split_sizes = var_2398_split_sizes_0, x = normed_121_cast_fp16)[name = string("op_2398_cast_fp16")]; + tensor layers_5_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403463296)))]; + tensor h_31_cast_fp16 = mul(x = var_2398_cast_fp16_0, y = layers_5_input_layernorm_weight_promoted_to_fp16)[name = string("h_31_cast_fp16")]; + tensor var_2404 = const()[name = string("op_2404"), val = tensor([0, 2, 1])]; + tensor var_2407_axes_0 = const()[name = string("op_2407_axes_0"), val = tensor([2])]; + tensor var_2405_cast_fp16 = transpose(perm = var_2404, x = h_31_cast_fp16)[name = string("transpose_45")]; + tensor var_2407_cast_fp16 = expand_dims(axes = var_2407_axes_0, x = var_2405_cast_fp16)[name = string("op_2407_cast_fp16")]; + string q_51_pad_type_0 = const()[name = string("q_51_pad_type_0"), val = string("valid")]; + tensor q_51_strides_0 = const()[name = string("q_51_strides_0"), val = tensor([1, 1])]; + tensor q_51_pad_0 = const()[name = string("q_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_51_dilations_0 = const()[name = string("q_51_dilations_0"), val = tensor([1, 1])]; + int32 q_51_groups_0 = const()[name = string("q_51_groups_0"), val = int32(1)]; + tensor q_51 = conv(dilations = q_51_dilations_0, groups = q_51_groups_0, pad = q_51_pad_0, pad_type = q_51_pad_type_0, strides = q_51_strides_0, weight = layers_5_self_attn_q_proj_weight_palettized, x = var_2407_cast_fp16)[name = string("q_51")]; + tensor var_2428 = const()[name = string("op_2428"), val = tensor([1, 8, 256, 3])]; + tensor var_2429 = reshape(shape = var_2428, x = q_51)[name = string("op_2429")]; + tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2452 = const()[name = string("op_2452"), val = tensor([3, 8, 256])]; + tensor transpose_46 = transpose(perm = transpose_46_perm_0, x = var_2429)[name = string("transpose_44")]; + tensor x_81 = reshape(shape = var_2452, x = transpose_46)[name = string("x_81")]; + int32 var_2458 = const()[name = string("op_2458"), val = int32(-1)]; + fp16 const_41_promoted = const()[name = string("const_41_promoted"), val = fp16(-0x1p+0)]; + tensor var_2460 = mul(x = x_81, y = const_41_promoted)[name = string("op_2460")]; + bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; + tensor input_125 = concat(axis = var_2458, interleave = input_125_interleave_0, values = (x_81, var_2460))[name = string("input_125")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_2455_to_fp16, x = input_125)[name = string("normed_125_cast_fp16")]; + tensor var_2465_split_sizes_0 = const()[name = string("op_2465_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2465_axis_0 = const()[name = string("op_2465_axis_0"), val = int32(-1)]; + tensor var_2465_0, tensor var_2465_1 = split(axis = var_2465_axis_0, split_sizes = var_2465_split_sizes_0, x = normed_125_cast_fp16)[name = string("op_2465")]; + tensor q_55 = mul(x = var_2465_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_55")]; + tensor var_2472 = const()[name = string("op_2472"), val = tensor([1, 3, 8, 256])]; + tensor var_2473 = reshape(shape = var_2472, x = q_55)[name = string("op_2473")]; + tensor var_2478 = const()[name = string("op_2478"), val = tensor([0, 2, 1, 3])]; + tensor q_57 = transpose(perm = var_2478, x = var_2473)[name = string("transpose_43")]; + tensor var_2480_cast_fp16 = mul(x = q_57, y = cos_s)[name = string("op_2480_cast_fp16")]; + tensor var_2481_split_sizes_0 = const()[name = string("op_2481_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2481_axis_0 = const()[name = string("op_2481_axis_0"), val = int32(-1)]; + tensor var_2481_0, tensor var_2481_1 = split(axis = var_2481_axis_0, split_sizes = var_2481_split_sizes_0, x = q_57)[name = string("op_2481")]; + fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; + tensor var_2483 = mul(x = var_2481_1, y = const_42_promoted)[name = string("op_2483")]; + int32 var_2485 = const()[name = string("op_2485"), val = int32(-1)]; + bool var_2486_interleave_0 = const()[name = string("op_2486_interleave_0"), val = bool(false)]; + tensor var_2486 = concat(axis = var_2485, interleave = var_2486_interleave_0, values = (var_2483, var_2481_0))[name = string("op_2486")]; + tensor var_2487_cast_fp16 = mul(x = var_2486, y = sin_s)[name = string("op_2487_cast_fp16")]; + tensor q_59_cast_fp16 = add(x = var_2480_cast_fp16, y = var_2487_cast_fp16)[name = string("q_59_cast_fp16")]; + bool attn_weights_21_transpose_x_0 = const()[name = string("attn_weights_21_transpose_x_0"), val = bool(false)]; + bool attn_weights_21_transpose_y_0 = const()[name = string("attn_weights_21_transpose_y_0"), val = bool(false)]; + tensor attn_weights_21_cast_fp16 = matmul(transpose_x = attn_weights_21_transpose_x_0, transpose_y = attn_weights_21_transpose_y_0, x = q_59_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_83_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask_sliding)[name = string("x_83_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_83_cast_fp16)[name = string("reduce_max_5")]; + tensor var_2519 = sub(x = x_83_cast_fp16, y = reduce_max_5)[name = string("op_2519")]; + tensor var_2525 = exp(x = var_2519)[name = string("op_2525")]; + tensor var_2535_axes_0 = const()[name = string("op_2535_axes_0"), val = tensor([-1])]; + bool var_2535_keep_dims_0 = const()[name = string("op_2535_keep_dims_0"), val = bool(true)]; + tensor var_2535 = reduce_sum(axes = var_2535_axes_0, keep_dims = var_2535_keep_dims_0, x = var_2525)[name = string("op_2535")]; + tensor var_2541_cast_fp16 = real_div(x = var_2525, y = var_2535)[name = string("op_2541_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = var_2541_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_2552 = const()[name = string("op_2552"), val = tensor([0, 2, 1, 3])]; + tensor var_2559 = const()[name = string("op_2559"), val = tensor([1, 3, -1])]; + tensor var_2553_cast_fp16 = transpose(perm = var_2552, x = attn_output_31_cast_fp16)[name = string("transpose_42")]; + tensor attn_output_33_cast_fp16 = reshape(shape = var_2559, x = var_2553_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_2564 = const()[name = string("op_2564"), val = tensor([0, 2, 1])]; + string var_2580_pad_type_0 = const()[name = string("op_2580_pad_type_0"), val = string("valid")]; + int32 var_2580_groups_0 = const()[name = string("op_2580_groups_0"), val = int32(1)]; + tensor var_2580_strides_0 = const()[name = string("op_2580_strides_0"), val = tensor([1])]; + tensor var_2580_pad_0 = const()[name = string("op_2580_pad_0"), val = tensor([0, 0])]; + tensor var_2580_dilations_0 = const()[name = string("op_2580_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403468480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406089984))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2565_cast_fp16 = transpose(perm = var_2564, x = attn_output_33_cast_fp16)[name = string("transpose_41")]; + tensor var_2580_cast_fp16 = conv(dilations = var_2580_dilations_0, groups = var_2580_groups_0, pad = var_2580_pad_0, pad_type = var_2580_pad_type_0, strides = var_2580_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_2565_cast_fp16)[name = string("op_2580_cast_fp16")]; + tensor var_2584 = const()[name = string("op_2584"), val = tensor([0, 2, 1])]; + int32 var_2590 = const()[name = string("op_2590"), val = int32(-1)]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_87_cast_fp16 = transpose(perm = var_2584, x = var_2580_cast_fp16)[name = string("transpose_40")]; + tensor var_2592_cast_fp16 = mul(x = x_87_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2592_cast_fp16")]; + bool input_129_interleave_0 = const()[name = string("input_129_interleave_0"), val = bool(false)]; + tensor input_129_cast_fp16 = concat(axis = var_2590, interleave = input_129_interleave_0, values = (x_87_cast_fp16, var_2592_cast_fp16))[name = string("input_129_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_2587_to_fp16 = const()[name = string("op_2587_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_2587_to_fp16, x = input_129_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor var_2597_split_sizes_0 = const()[name = string("op_2597_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2597_axis_0 = const()[name = string("op_2597_axis_0"), val = int32(-1)]; + tensor var_2597_cast_fp16_0, tensor var_2597_cast_fp16_1 = split(axis = var_2597_axis_0, split_sizes = var_2597_split_sizes_0, x = normed_129_cast_fp16)[name = string("op_2597_cast_fp16")]; + tensor layers_5_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406092608)))]; + tensor attn_output_35_cast_fp16 = mul(x = var_2597_cast_fp16_0, y = layers_5_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor x_89_cast_fp16 = add(x = x_79_cast_fp16, y = attn_output_35_cast_fp16)[name = string("x_89_cast_fp16")]; + int32 var_2606 = const()[name = string("op_2606"), val = int32(-1)]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2608_cast_fp16 = mul(x = x_89_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2608_cast_fp16")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131_cast_fp16 = concat(axis = var_2606, interleave = input_131_interleave_0, values = (x_89_cast_fp16, var_2608_cast_fp16))[name = string("input_131_cast_fp16")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_2603_to_fp16 = const()[name = string("op_2603_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_2603_to_fp16, x = input_131_cast_fp16)[name = string("normed_133_cast_fp16")]; + tensor var_2613_split_sizes_0 = const()[name = string("op_2613_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2613_axis_0 = const()[name = string("op_2613_axis_0"), val = int32(-1)]; + tensor var_2613_cast_fp16_0, tensor var_2613_cast_fp16_1 = split(axis = var_2613_axis_0, split_sizes = var_2613_split_sizes_0, x = normed_133_cast_fp16)[name = string("op_2613_cast_fp16")]; + tensor layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406097792)))]; + tensor h_33_cast_fp16 = mul(x = var_2613_cast_fp16_0, y = layers_5_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_33_cast_fp16")]; + tensor var_2624 = const()[name = string("op_2624"), val = tensor([0, 2, 1])]; + tensor input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor([2])]; + tensor var_2625 = transpose(perm = var_2624, x = h_33_cast_fp16)[name = string("transpose_39")]; + tensor input_133 = expand_dims(axes = input_133_axes_0, x = var_2625)[name = string("input_133")]; + string gate_21_pad_type_0 = const()[name = string("gate_21_pad_type_0"), val = string("valid")]; + tensor gate_21_strides_0 = const()[name = string("gate_21_strides_0"), val = tensor([1, 1])]; + tensor gate_21_pad_0 = const()[name = string("gate_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_21_dilations_0 = const()[name = string("gate_21_dilations_0"), val = tensor([1, 1])]; + int32 gate_21_groups_0 = const()[name = string("gate_21_groups_0"), val = int32(1)]; + tensor gate_21 = conv(dilations = gate_21_dilations_0, groups = gate_21_groups_0, pad = gate_21_pad_0, pad_type = gate_21_pad_type_0, strides = gate_21_strides_0, weight = layers_5_mlp_gate_proj_weight_palettized, x = input_133)[name = string("gate_21")]; + string up_11_pad_type_0 = const()[name = string("up_11_pad_type_0"), val = string("valid")]; + tensor up_11_strides_0 = const()[name = string("up_11_strides_0"), val = tensor([1, 1])]; + tensor up_11_pad_0 = const()[name = string("up_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_11_dilations_0 = const()[name = string("up_11_dilations_0"), val = tensor([1, 1])]; + int32 up_11_groups_0 = const()[name = string("up_11_groups_0"), val = int32(1)]; + tensor up_11 = conv(dilations = up_11_dilations_0, groups = up_11_groups_0, pad = up_11_pad_0, pad_type = up_11_pad_type_0, strides = up_11_strides_0, weight = layers_5_mlp_up_proj_weight_palettized, x = input_133)[name = string("up_11")]; + string gate_23_mode_0 = const()[name = string("gate_23_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_23 = gelu(mode = gate_23_mode_0, x = gate_21)[name = string("gate_23")]; + tensor input_135 = mul(x = gate_23, y = up_11)[name = string("input_135")]; + string mlp_out_11_pad_type_0 = const()[name = string("mlp_out_11_pad_type_0"), val = string("valid")]; + tensor mlp_out_11_strides_0 = const()[name = string("mlp_out_11_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_11_pad_0 = const()[name = string("mlp_out_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_11_dilations_0 = const()[name = string("mlp_out_11_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_11_groups_0 = const()[name = string("mlp_out_11_groups_0"), val = int32(1)]; + tensor mlp_out_11 = conv(dilations = mlp_out_11_dilations_0, groups = mlp_out_11_groups_0, pad = mlp_out_11_pad_0, pad_type = mlp_out_11_pad_type_0, strides = mlp_out_11_strides_0, weight = layers_5_mlp_down_proj_weight_palettized, x = input_135)[name = string("mlp_out_11")]; + tensor var_2665_axes_0 = const()[name = string("op_2665_axes_0"), val = tensor([2])]; + tensor var_2665 = squeeze(axes = var_2665_axes_0, x = mlp_out_11)[name = string("op_2665")]; + tensor var_2669 = const()[name = string("op_2669"), val = tensor([0, 2, 1])]; + int32 var_2675 = const()[name = string("op_2675"), val = int32(-1)]; + fp16 const_45_promoted = const()[name = string("const_45_promoted"), val = fp16(-0x1p+0)]; + tensor x_91 = transpose(perm = var_2669, x = var_2665)[name = string("transpose_38")]; + tensor var_2677 = mul(x = x_91, y = const_45_promoted)[name = string("op_2677")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137 = concat(axis = var_2675, interleave = input_137_interleave_0, values = (x_91, var_2677))[name = string("input_137")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_2672_to_fp16 = const()[name = string("op_2672_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_2672_to_fp16, x = input_137)[name = string("normed_137_cast_fp16")]; + tensor var_2682_split_sizes_0 = const()[name = string("op_2682_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2682_axis_0 = const()[name = string("op_2682_axis_0"), val = int32(-1)]; + tensor var_2682_0, tensor var_2682_1 = split(axis = var_2682_axis_0, split_sizes = var_2682_split_sizes_0, x = normed_137_cast_fp16)[name = string("op_2682")]; + tensor hidden_states_53 = mul(x = var_2682_0, y = layers_5_post_feedforward_layernorm_weight)[name = string("hidden_states_53")]; + tensor hidden_states_55_cast_fp16 = add(x = x_89_cast_fp16, y = hidden_states_53)[name = string("hidden_states_55_cast_fp16")]; + tensor per_layer_slice_11_begin_0 = const()[name = string("per_layer_slice_11_begin_0"), val = tensor([0, 0, 9728])]; + tensor per_layer_slice_11_end_0 = const()[name = string("per_layer_slice_11_end_0"), val = tensor([1, 3, 9984])]; + tensor per_layer_slice_11_end_mask_0 = const()[name = string("per_layer_slice_11_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_11_cast_fp16 = slice_by_index(begin = per_layer_slice_11_begin_0, end = per_layer_slice_11_end_0, end_mask = per_layer_slice_11_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_11_cast_fp16")]; + tensor var_2710 = const()[name = string("op_2710"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_2711 = transpose(perm = var_2710, x = hidden_states_55_cast_fp16)[name = string("transpose_37")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_2711)[name = string("input_139")]; + string gated_31_pad_type_0 = const()[name = string("gated_31_pad_type_0"), val = string("valid")]; + tensor gated_31_strides_0 = const()[name = string("gated_31_strides_0"), val = tensor([1, 1])]; + tensor gated_31_pad_0 = const()[name = string("gated_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_31_dilations_0 = const()[name = string("gated_31_dilations_0"), val = tensor([1, 1])]; + int32 gated_31_groups_0 = const()[name = string("gated_31_groups_0"), val = int32(1)]; + tensor gated_31 = conv(dilations = gated_31_dilations_0, groups = gated_31_groups_0, pad = gated_31_pad_0, pad_type = gated_31_pad_type_0, strides = gated_31_strides_0, weight = layers_5_per_layer_input_gate_weight_palettized, x = input_139)[name = string("gated_31")]; + string gated_33_mode_0 = const()[name = string("gated_33_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_33 = gelu(mode = gated_33_mode_0, x = gated_31)[name = string("gated_33")]; + tensor var_2730 = const()[name = string("op_2730"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_11_axes_0 = const()[name = string("per_layer_slice_conv_11_axes_0"), val = tensor([2])]; + tensor var_2731_cast_fp16 = transpose(perm = var_2730, x = per_layer_slice_11_cast_fp16)[name = string("transpose_36")]; + tensor per_layer_slice_conv_11_cast_fp16 = expand_dims(axes = per_layer_slice_conv_11_axes_0, x = var_2731_cast_fp16)[name = string("per_layer_slice_conv_11_cast_fp16")]; + tensor input_141_cast_fp16 = mul(x = gated_33, y = per_layer_slice_conv_11_cast_fp16)[name = string("input_141_cast_fp16")]; + string gated_35_pad_type_0 = const()[name = string("gated_35_pad_type_0"), val = string("valid")]; + tensor gated_35_strides_0 = const()[name = string("gated_35_strides_0"), val = tensor([1, 1])]; + tensor gated_35_pad_0 = const()[name = string("gated_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_35_dilations_0 = const()[name = string("gated_35_dilations_0"), val = tensor([1, 1])]; + int32 gated_35_groups_0 = const()[name = string("gated_35_groups_0"), val = int32(1)]; + tensor layers_5_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406102976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406430720))))[name = string("layers_5_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_35_cast_fp16 = conv(dilations = gated_35_dilations_0, groups = gated_35_groups_0, pad = gated_35_pad_0, pad_type = gated_35_pad_type_0, strides = gated_35_strides_0, weight = layers_5_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_141_cast_fp16)[name = string("gated_35_cast_fp16")]; + tensor var_2747_axes_0 = const()[name = string("op_2747_axes_0"), val = tensor([2])]; + tensor var_2747_cast_fp16 = squeeze(axes = var_2747_axes_0, x = gated_35_cast_fp16)[name = string("op_2747_cast_fp16")]; + tensor var_2751 = const()[name = string("op_2751"), val = tensor([0, 2, 1])]; + int32 var_2757 = const()[name = string("op_2757"), val = int32(-1)]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_93_cast_fp16 = transpose(perm = var_2751, x = var_2747_cast_fp16)[name = string("transpose_35")]; + tensor var_2759_cast_fp16 = mul(x = x_93_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2759_cast_fp16")]; + bool input_143_interleave_0 = const()[name = string("input_143_interleave_0"), val = bool(false)]; + tensor input_143_cast_fp16 = concat(axis = var_2757, interleave = input_143_interleave_0, values = (x_93_cast_fp16, var_2759_cast_fp16))[name = string("input_143_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_2754_to_fp16 = const()[name = string("op_2754_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_2754_to_fp16, x = input_143_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor var_2764_split_sizes_0 = const()[name = string("op_2764_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2764_axis_0 = const()[name = string("op_2764_axis_0"), val = int32(-1)]; + tensor var_2764_cast_fp16_0, tensor var_2764_cast_fp16_1 = split(axis = var_2764_axis_0, split_sizes = var_2764_split_sizes_0, x = normed_141_cast_fp16)[name = string("op_2764_cast_fp16")]; + tensor layers_5_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_5_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406433344)))]; + tensor hidden_states_59_cast_fp16 = mul(x = var_2764_cast_fp16_0, y = layers_5_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_59_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = tensor([0x1.c4p-1])]; + tensor x_95_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_47_promoted_to_fp16)[name = string("x_95_cast_fp16")]; + int32 var_2779 = const()[name = string("op_2779"), val = int32(-1)]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2781_cast_fp16 = mul(x = x_95_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_2781_cast_fp16")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145_cast_fp16 = concat(axis = var_2779, interleave = input_145_interleave_0, values = (x_95_cast_fp16, var_2781_cast_fp16))[name = string("input_145_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_2776_to_fp16 = const()[name = string("op_2776_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_2776_to_fp16, x = input_145_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor var_2786_split_sizes_0 = const()[name = string("op_2786_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2786_axis_0 = const()[name = string("op_2786_axis_0"), val = int32(-1)]; + tensor var_2786_cast_fp16_0, tensor var_2786_cast_fp16_1 = split(axis = var_2786_axis_0, split_sizes = var_2786_split_sizes_0, x = normed_145_cast_fp16)[name = string("op_2786_cast_fp16")]; + tensor layers_6_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406438528)))]; + tensor h_37_cast_fp16 = mul(x = var_2786_cast_fp16_0, y = layers_6_input_layernorm_weight_promoted_to_fp16)[name = string("h_37_cast_fp16")]; + tensor var_2792 = const()[name = string("op_2792"), val = tensor([0, 2, 1])]; + tensor var_2795_axes_0 = const()[name = string("op_2795_axes_0"), val = tensor([2])]; + tensor var_2793_cast_fp16 = transpose(perm = var_2792, x = h_37_cast_fp16)[name = string("transpose_34")]; + tensor var_2795_cast_fp16 = expand_dims(axes = var_2795_axes_0, x = var_2793_cast_fp16)[name = string("op_2795_cast_fp16")]; + string q_61_pad_type_0 = const()[name = string("q_61_pad_type_0"), val = string("valid")]; + tensor q_61_strides_0 = const()[name = string("q_61_strides_0"), val = tensor([1, 1])]; + tensor q_61_pad_0 = const()[name = string("q_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_61_dilations_0 = const()[name = string("q_61_dilations_0"), val = tensor([1, 1])]; + int32 q_61_groups_0 = const()[name = string("q_61_groups_0"), val = int32(1)]; + tensor q_61 = conv(dilations = q_61_dilations_0, groups = q_61_groups_0, pad = q_61_pad_0, pad_type = q_61_pad_type_0, strides = q_61_strides_0, weight = layers_6_self_attn_q_proj_weight_palettized, x = var_2795_cast_fp16)[name = string("q_61")]; + tensor var_2816 = const()[name = string("op_2816"), val = tensor([1, 8, 256, 3])]; + tensor var_2817 = reshape(shape = var_2816, x = q_61)[name = string("op_2817")]; + tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_2840 = const()[name = string("op_2840"), val = tensor([3, 8, 256])]; + tensor transpose_48 = transpose(perm = transpose_48_perm_0, x = var_2817)[name = string("transpose_33")]; + tensor x_97 = reshape(shape = var_2840, x = transpose_48)[name = string("x_97")]; + int32 var_2846 = const()[name = string("op_2846"), val = int32(-1)]; + fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; + tensor var_2848 = mul(x = x_97, y = const_49_promoted)[name = string("op_2848")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149 = concat(axis = var_2846, interleave = input_149_interleave_0, values = (x_97, var_2848))[name = string("input_149")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_2843_to_fp16 = const()[name = string("op_2843_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_2843_to_fp16, x = input_149)[name = string("normed_149_cast_fp16")]; + tensor var_2853_split_sizes_0 = const()[name = string("op_2853_split_sizes_0"), val = tensor([256, 256])]; + int32 var_2853_axis_0 = const()[name = string("op_2853_axis_0"), val = int32(-1)]; + tensor var_2853_0, tensor var_2853_1 = split(axis = var_2853_axis_0, split_sizes = var_2853_split_sizes_0, x = normed_149_cast_fp16)[name = string("op_2853")]; + tensor q_65 = mul(x = var_2853_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_65")]; + tensor var_2860 = const()[name = string("op_2860"), val = tensor([1, 3, 8, 256])]; + tensor var_2861 = reshape(shape = var_2860, x = q_65)[name = string("op_2861")]; + tensor var_2866 = const()[name = string("op_2866"), val = tensor([0, 2, 1, 3])]; + tensor q_67 = transpose(perm = var_2866, x = var_2861)[name = string("transpose_32")]; + tensor var_2868_cast_fp16 = mul(x = q_67, y = cos_s)[name = string("op_2868_cast_fp16")]; + tensor var_2869_split_sizes_0 = const()[name = string("op_2869_split_sizes_0"), val = tensor([128, 128])]; + int32 var_2869_axis_0 = const()[name = string("op_2869_axis_0"), val = int32(-1)]; + tensor var_2869_0, tensor var_2869_1 = split(axis = var_2869_axis_0, split_sizes = var_2869_split_sizes_0, x = q_67)[name = string("op_2869")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_2871 = mul(x = var_2869_1, y = const_50_promoted)[name = string("op_2871")]; + int32 var_2873 = const()[name = string("op_2873"), val = int32(-1)]; + bool var_2874_interleave_0 = const()[name = string("op_2874_interleave_0"), val = bool(false)]; + tensor var_2874 = concat(axis = var_2873, interleave = var_2874_interleave_0, values = (var_2871, var_2869_0))[name = string("op_2874")]; + tensor var_2875_cast_fp16 = mul(x = var_2874, y = sin_s)[name = string("op_2875_cast_fp16")]; + tensor q_69_cast_fp16 = add(x = var_2868_cast_fp16, y = var_2875_cast_fp16)[name = string("q_69_cast_fp16")]; + bool attn_weights_25_transpose_x_0 = const()[name = string("attn_weights_25_transpose_x_0"), val = bool(false)]; + bool attn_weights_25_transpose_y_0 = const()[name = string("attn_weights_25_transpose_y_0"), val = bool(false)]; + tensor attn_weights_25_cast_fp16 = matmul(transpose_x = attn_weights_25_transpose_x_0, transpose_y = attn_weights_25_transpose_y_0, x = q_69_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask_sliding)[name = string("x_99_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_99_cast_fp16)[name = string("reduce_max_6")]; + tensor var_2907 = sub(x = x_99_cast_fp16, y = reduce_max_6)[name = string("op_2907")]; + tensor var_2913 = exp(x = var_2907)[name = string("op_2913")]; + tensor var_2923_axes_0 = const()[name = string("op_2923_axes_0"), val = tensor([-1])]; + bool var_2923_keep_dims_0 = const()[name = string("op_2923_keep_dims_0"), val = bool(true)]; + tensor var_2923 = reduce_sum(axes = var_2923_axes_0, keep_dims = var_2923_keep_dims_0, x = var_2913)[name = string("op_2923")]; + tensor var_2929_cast_fp16 = real_div(x = var_2913, y = var_2923)[name = string("op_2929_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_2929_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_2940 = const()[name = string("op_2940"), val = tensor([0, 2, 1, 3])]; + tensor var_2947 = const()[name = string("op_2947"), val = tensor([1, 3, -1])]; + tensor var_2941_cast_fp16 = transpose(perm = var_2940, x = attn_output_37_cast_fp16)[name = string("transpose_31")]; + tensor attn_output_39_cast_fp16 = reshape(shape = var_2947, x = var_2941_cast_fp16)[name = string("attn_output_39_cast_fp16")]; + tensor var_2952 = const()[name = string("op_2952"), val = tensor([0, 2, 1])]; + string var_2968_pad_type_0 = const()[name = string("op_2968_pad_type_0"), val = string("valid")]; + int32 var_2968_groups_0 = const()[name = string("op_2968_groups_0"), val = int32(1)]; + tensor var_2968_strides_0 = const()[name = string("op_2968_strides_0"), val = tensor([1])]; + tensor var_2968_pad_0 = const()[name = string("op_2968_pad_0"), val = tensor([0, 0])]; + tensor var_2968_dilations_0 = const()[name = string("op_2968_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406443712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409065216))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2953_cast_fp16 = transpose(perm = var_2952, x = attn_output_39_cast_fp16)[name = string("transpose_30")]; + tensor var_2968_cast_fp16 = conv(dilations = var_2968_dilations_0, groups = var_2968_groups_0, pad = var_2968_pad_0, pad_type = var_2968_pad_type_0, strides = var_2968_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_2953_cast_fp16)[name = string("op_2968_cast_fp16")]; + tensor var_2972 = const()[name = string("op_2972"), val = tensor([0, 2, 1])]; + int32 var_2978 = const()[name = string("op_2978"), val = int32(-1)]; + fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_103_cast_fp16 = transpose(perm = var_2972, x = var_2968_cast_fp16)[name = string("transpose_29")]; + tensor var_2980_cast_fp16 = mul(x = x_103_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_2980_cast_fp16")]; + bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; + tensor input_153_cast_fp16 = concat(axis = var_2978, interleave = input_153_interleave_0, values = (x_103_cast_fp16, var_2980_cast_fp16))[name = string("input_153_cast_fp16")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_2975_to_fp16 = const()[name = string("op_2975_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_2975_to_fp16, x = input_153_cast_fp16)[name = string("normed_153_cast_fp16")]; + tensor var_2985_split_sizes_0 = const()[name = string("op_2985_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_2985_axis_0 = const()[name = string("op_2985_axis_0"), val = int32(-1)]; + tensor var_2985_cast_fp16_0, tensor var_2985_cast_fp16_1 = split(axis = var_2985_axis_0, split_sizes = var_2985_split_sizes_0, x = normed_153_cast_fp16)[name = string("op_2985_cast_fp16")]; + tensor layers_6_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409067840)))]; + tensor attn_output_41_cast_fp16 = mul(x = var_2985_cast_fp16_0, y = layers_6_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_95_cast_fp16, y = attn_output_41_cast_fp16)[name = string("x_105_cast_fp16")]; + int32 var_2994 = const()[name = string("op_2994"), val = int32(-1)]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2996_cast_fp16 = mul(x = x_105_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2996_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_2994, interleave = input_155_interleave_0, values = (x_105_cast_fp16, var_2996_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_2991_to_fp16 = const()[name = string("op_2991_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_2991_to_fp16, x = input_155_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor var_3001_split_sizes_0 = const()[name = string("op_3001_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3001_axis_0 = const()[name = string("op_3001_axis_0"), val = int32(-1)]; + tensor var_3001_cast_fp16_0, tensor var_3001_cast_fp16_1 = split(axis = var_3001_axis_0, split_sizes = var_3001_split_sizes_0, x = normed_157_cast_fp16)[name = string("op_3001_cast_fp16")]; + tensor layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409073024)))]; + tensor h_39_cast_fp16 = mul(x = var_3001_cast_fp16_0, y = layers_6_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_39_cast_fp16")]; + tensor var_3012 = const()[name = string("op_3012"), val = tensor([0, 2, 1])]; + tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; + tensor var_3013 = transpose(perm = var_3012, x = h_39_cast_fp16)[name = string("transpose_28")]; + tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_3013)[name = string("input_157")]; + string gate_25_pad_type_0 = const()[name = string("gate_25_pad_type_0"), val = string("valid")]; + tensor gate_25_strides_0 = const()[name = string("gate_25_strides_0"), val = tensor([1, 1])]; + tensor gate_25_pad_0 = const()[name = string("gate_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_25_dilations_0 = const()[name = string("gate_25_dilations_0"), val = tensor([1, 1])]; + int32 gate_25_groups_0 = const()[name = string("gate_25_groups_0"), val = int32(1)]; + tensor gate_25 = conv(dilations = gate_25_dilations_0, groups = gate_25_groups_0, pad = gate_25_pad_0, pad_type = gate_25_pad_type_0, strides = gate_25_strides_0, weight = layers_6_mlp_gate_proj_weight_palettized, x = input_157)[name = string("gate_25")]; + string up_13_pad_type_0 = const()[name = string("up_13_pad_type_0"), val = string("valid")]; + tensor up_13_strides_0 = const()[name = string("up_13_strides_0"), val = tensor([1, 1])]; + tensor up_13_pad_0 = const()[name = string("up_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_13_dilations_0 = const()[name = string("up_13_dilations_0"), val = tensor([1, 1])]; + int32 up_13_groups_0 = const()[name = string("up_13_groups_0"), val = int32(1)]; + tensor up_13 = conv(dilations = up_13_dilations_0, groups = up_13_groups_0, pad = up_13_pad_0, pad_type = up_13_pad_type_0, strides = up_13_strides_0, weight = layers_6_mlp_up_proj_weight_palettized, x = input_157)[name = string("up_13")]; + string gate_27_mode_0 = const()[name = string("gate_27_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_27 = gelu(mode = gate_27_mode_0, x = gate_25)[name = string("gate_27")]; + tensor input_159 = mul(x = gate_27, y = up_13)[name = string("input_159")]; + string mlp_out_13_pad_type_0 = const()[name = string("mlp_out_13_pad_type_0"), val = string("valid")]; + tensor mlp_out_13_strides_0 = const()[name = string("mlp_out_13_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_13_pad_0 = const()[name = string("mlp_out_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_13_dilations_0 = const()[name = string("mlp_out_13_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_13_groups_0 = const()[name = string("mlp_out_13_groups_0"), val = int32(1)]; + tensor mlp_out_13 = conv(dilations = mlp_out_13_dilations_0, groups = mlp_out_13_groups_0, pad = mlp_out_13_pad_0, pad_type = mlp_out_13_pad_type_0, strides = mlp_out_13_strides_0, weight = layers_6_mlp_down_proj_weight_palettized, x = input_159)[name = string("mlp_out_13")]; + tensor var_3053_axes_0 = const()[name = string("op_3053_axes_0"), val = tensor([2])]; + tensor var_3053 = squeeze(axes = var_3053_axes_0, x = mlp_out_13)[name = string("op_3053")]; + tensor var_3057 = const()[name = string("op_3057"), val = tensor([0, 2, 1])]; + int32 var_3063 = const()[name = string("op_3063"), val = int32(-1)]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor x_107 = transpose(perm = var_3057, x = var_3053)[name = string("transpose_27")]; + tensor var_3065 = mul(x = x_107, y = const_53_promoted)[name = string("op_3065")]; + bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; + tensor input_161 = concat(axis = var_3063, interleave = input_161_interleave_0, values = (x_107, var_3065))[name = string("input_161")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_3060_to_fp16 = const()[name = string("op_3060_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_3060_to_fp16, x = input_161)[name = string("normed_161_cast_fp16")]; + tensor var_3070_split_sizes_0 = const()[name = string("op_3070_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3070_axis_0 = const()[name = string("op_3070_axis_0"), val = int32(-1)]; + tensor var_3070_0, tensor var_3070_1 = split(axis = var_3070_axis_0, split_sizes = var_3070_split_sizes_0, x = normed_161_cast_fp16)[name = string("op_3070")]; + tensor hidden_states_63 = mul(x = var_3070_0, y = layers_6_post_feedforward_layernorm_weight)[name = string("hidden_states_63")]; + tensor hidden_states_65_cast_fp16 = add(x = x_105_cast_fp16, y = hidden_states_63)[name = string("hidden_states_65_cast_fp16")]; + tensor per_layer_slice_13_begin_0 = const()[name = string("per_layer_slice_13_begin_0"), val = tensor([0, 0, 9984])]; + tensor per_layer_slice_13_end_0 = const()[name = string("per_layer_slice_13_end_0"), val = tensor([1, 3, 10240])]; + tensor per_layer_slice_13_end_mask_0 = const()[name = string("per_layer_slice_13_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_13_cast_fp16 = slice_by_index(begin = per_layer_slice_13_begin_0, end = per_layer_slice_13_end_0, end_mask = per_layer_slice_13_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_13_cast_fp16")]; + tensor var_3098 = const()[name = string("op_3098"), val = tensor([0, 2, 1])]; + tensor input_163_axes_0 = const()[name = string("input_163_axes_0"), val = tensor([2])]; + tensor var_3099 = transpose(perm = var_3098, x = hidden_states_65_cast_fp16)[name = string("transpose_26")]; + tensor input_163 = expand_dims(axes = input_163_axes_0, x = var_3099)[name = string("input_163")]; + string gated_37_pad_type_0 = const()[name = string("gated_37_pad_type_0"), val = string("valid")]; + tensor gated_37_strides_0 = const()[name = string("gated_37_strides_0"), val = tensor([1, 1])]; + tensor gated_37_pad_0 = const()[name = string("gated_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_37_dilations_0 = const()[name = string("gated_37_dilations_0"), val = tensor([1, 1])]; + int32 gated_37_groups_0 = const()[name = string("gated_37_groups_0"), val = int32(1)]; + tensor gated_37 = conv(dilations = gated_37_dilations_0, groups = gated_37_groups_0, pad = gated_37_pad_0, pad_type = gated_37_pad_type_0, strides = gated_37_strides_0, weight = layers_6_per_layer_input_gate_weight_palettized, x = input_163)[name = string("gated_37")]; + string gated_39_mode_0 = const()[name = string("gated_39_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_39 = gelu(mode = gated_39_mode_0, x = gated_37)[name = string("gated_39")]; + tensor var_3118 = const()[name = string("op_3118"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_13_axes_0 = const()[name = string("per_layer_slice_conv_13_axes_0"), val = tensor([2])]; + tensor var_3119_cast_fp16 = transpose(perm = var_3118, x = per_layer_slice_13_cast_fp16)[name = string("transpose_25")]; + tensor per_layer_slice_conv_13_cast_fp16 = expand_dims(axes = per_layer_slice_conv_13_axes_0, x = var_3119_cast_fp16)[name = string("per_layer_slice_conv_13_cast_fp16")]; + tensor input_165_cast_fp16 = mul(x = gated_39, y = per_layer_slice_conv_13_cast_fp16)[name = string("input_165_cast_fp16")]; + string gated_41_pad_type_0 = const()[name = string("gated_41_pad_type_0"), val = string("valid")]; + tensor gated_41_strides_0 = const()[name = string("gated_41_strides_0"), val = tensor([1, 1])]; + tensor gated_41_pad_0 = const()[name = string("gated_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_41_dilations_0 = const()[name = string("gated_41_dilations_0"), val = tensor([1, 1])]; + int32 gated_41_groups_0 = const()[name = string("gated_41_groups_0"), val = int32(1)]; + tensor layers_6_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409078208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409405952))))[name = string("layers_6_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_41_cast_fp16 = conv(dilations = gated_41_dilations_0, groups = gated_41_groups_0, pad = gated_41_pad_0, pad_type = gated_41_pad_type_0, strides = gated_41_strides_0, weight = layers_6_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_165_cast_fp16)[name = string("gated_41_cast_fp16")]; + tensor var_3135_axes_0 = const()[name = string("op_3135_axes_0"), val = tensor([2])]; + tensor var_3135_cast_fp16 = squeeze(axes = var_3135_axes_0, x = gated_41_cast_fp16)[name = string("op_3135_cast_fp16")]; + tensor var_3139 = const()[name = string("op_3139"), val = tensor([0, 2, 1])]; + int32 var_3145 = const()[name = string("op_3145"), val = int32(-1)]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_109_cast_fp16 = transpose(perm = var_3139, x = var_3135_cast_fp16)[name = string("transpose_24")]; + tensor var_3147_cast_fp16 = mul(x = x_109_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_3147_cast_fp16")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167_cast_fp16 = concat(axis = var_3145, interleave = input_167_interleave_0, values = (x_109_cast_fp16, var_3147_cast_fp16))[name = string("input_167_cast_fp16")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_3142_to_fp16 = const()[name = string("op_3142_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_3142_to_fp16, x = input_167_cast_fp16)[name = string("normed_165_cast_fp16")]; + tensor var_3152_split_sizes_0 = const()[name = string("op_3152_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3152_axis_0 = const()[name = string("op_3152_axis_0"), val = int32(-1)]; + tensor var_3152_cast_fp16_0, tensor var_3152_cast_fp16_1 = split(axis = var_3152_axis_0, split_sizes = var_3152_split_sizes_0, x = normed_165_cast_fp16)[name = string("op_3152_cast_fp16")]; + tensor layers_6_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_6_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409408576)))]; + tensor hidden_states_69_cast_fp16 = mul(x = var_3152_cast_fp16_0, y = layers_6_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_69_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = hidden_states_69_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = tensor([0x1.b6p-1])]; + tensor x_111_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_55_promoted_to_fp16)[name = string("x_111_cast_fp16")]; + int32 var_3167 = const()[name = string("op_3167"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3169_cast_fp16 = mul(x = x_111_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3169_cast_fp16")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169_cast_fp16 = concat(axis = var_3167, interleave = input_169_interleave_0, values = (x_111_cast_fp16, var_3169_cast_fp16))[name = string("input_169_cast_fp16")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_3164_to_fp16, x = input_169_cast_fp16)[name = string("normed_169_cast_fp16")]; + tensor var_3174_split_sizes_0 = const()[name = string("op_3174_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3174_axis_0 = const()[name = string("op_3174_axis_0"), val = int32(-1)]; + tensor var_3174_cast_fp16_0, tensor var_3174_cast_fp16_1 = split(axis = var_3174_axis_0, split_sizes = var_3174_split_sizes_0, x = normed_169_cast_fp16)[name = string("op_3174_cast_fp16")]; + tensor layers_7_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409413760)))]; + tensor h_43_cast_fp16 = mul(x = var_3174_cast_fp16_0, y = layers_7_input_layernorm_weight_promoted_to_fp16)[name = string("h_43_cast_fp16")]; + tensor var_3180 = const()[name = string("op_3180"), val = tensor([0, 2, 1])]; + tensor var_3183_axes_0 = const()[name = string("op_3183_axes_0"), val = tensor([2])]; + tensor var_3181_cast_fp16 = transpose(perm = var_3180, x = h_43_cast_fp16)[name = string("transpose_23")]; + tensor var_3183_cast_fp16 = expand_dims(axes = var_3183_axes_0, x = var_3181_cast_fp16)[name = string("op_3183_cast_fp16")]; + string q_71_pad_type_0 = const()[name = string("q_71_pad_type_0"), val = string("valid")]; + tensor q_71_strides_0 = const()[name = string("q_71_strides_0"), val = tensor([1, 1])]; + tensor q_71_pad_0 = const()[name = string("q_71_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_71_dilations_0 = const()[name = string("q_71_dilations_0"), val = tensor([1, 1])]; + int32 q_71_groups_0 = const()[name = string("q_71_groups_0"), val = int32(1)]; + tensor q_71 = conv(dilations = q_71_dilations_0, groups = q_71_groups_0, pad = q_71_pad_0, pad_type = q_71_pad_type_0, strides = q_71_strides_0, weight = layers_7_self_attn_q_proj_weight_palettized, x = var_3183_cast_fp16)[name = string("q_71")]; + tensor var_3204 = const()[name = string("op_3204"), val = tensor([1, 8, 256, 3])]; + tensor var_3205 = reshape(shape = var_3204, x = q_71)[name = string("op_3205")]; + tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_3228 = const()[name = string("op_3228"), val = tensor([3, 8, 256])]; + tensor transpose_50 = transpose(perm = transpose_50_perm_0, x = var_3205)[name = string("transpose_22")]; + tensor x_113 = reshape(shape = var_3228, x = transpose_50)[name = string("x_113")]; + int32 var_3234 = const()[name = string("op_3234"), val = int32(-1)]; + fp16 const_57_promoted = const()[name = string("const_57_promoted"), val = fp16(-0x1p+0)]; + tensor var_3236 = mul(x = x_113, y = const_57_promoted)[name = string("op_3236")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173 = concat(axis = var_3234, interleave = input_173_interleave_0, values = (x_113, var_3236))[name = string("input_173")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_3231_to_fp16 = const()[name = string("op_3231_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_3231_to_fp16, x = input_173)[name = string("normed_173_cast_fp16")]; + tensor var_3241_split_sizes_0 = const()[name = string("op_3241_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3241_axis_0 = const()[name = string("op_3241_axis_0"), val = int32(-1)]; + tensor var_3241_0, tensor var_3241_1 = split(axis = var_3241_axis_0, split_sizes = var_3241_split_sizes_0, x = normed_173_cast_fp16)[name = string("op_3241")]; + tensor q_75 = mul(x = var_3241_0, y = layers_0_self_attn_q_norm_weight)[name = string("q_75")]; + tensor var_3248 = const()[name = string("op_3248"), val = tensor([1, 3, 8, 256])]; + tensor var_3249 = reshape(shape = var_3248, x = q_75)[name = string("op_3249")]; + tensor var_3254 = const()[name = string("op_3254"), val = tensor([0, 2, 1, 3])]; + tensor q_77 = transpose(perm = var_3254, x = var_3249)[name = string("transpose_21")]; + tensor var_3256_cast_fp16 = mul(x = q_77, y = cos_s)[name = string("op_3256_cast_fp16")]; + tensor var_3257_split_sizes_0 = const()[name = string("op_3257_split_sizes_0"), val = tensor([128, 128])]; + int32 var_3257_axis_0 = const()[name = string("op_3257_axis_0"), val = int32(-1)]; + tensor var_3257_0, tensor var_3257_1 = split(axis = var_3257_axis_0, split_sizes = var_3257_split_sizes_0, x = q_77)[name = string("op_3257")]; + fp16 const_58_promoted = const()[name = string("const_58_promoted"), val = fp16(-0x1p+0)]; + tensor var_3259 = mul(x = var_3257_1, y = const_58_promoted)[name = string("op_3259")]; + int32 var_3261 = const()[name = string("op_3261"), val = int32(-1)]; + bool var_3262_interleave_0 = const()[name = string("op_3262_interleave_0"), val = bool(false)]; + tensor var_3262 = concat(axis = var_3261, interleave = var_3262_interleave_0, values = (var_3259, var_3257_0))[name = string("op_3262")]; + tensor var_3263_cast_fp16 = mul(x = var_3262, y = sin_s)[name = string("op_3263_cast_fp16")]; + tensor q_79_cast_fp16 = add(x = var_3256_cast_fp16, y = var_3263_cast_fp16)[name = string("q_79_cast_fp16")]; + bool attn_weights_29_transpose_x_0 = const()[name = string("attn_weights_29_transpose_x_0"), val = bool(false)]; + bool attn_weights_29_transpose_y_0 = const()[name = string("attn_weights_29_transpose_y_0"), val = bool(false)]; + tensor attn_weights_29_cast_fp16 = matmul(transpose_x = attn_weights_29_transpose_x_0, transpose_y = attn_weights_29_transpose_y_0, x = q_79_cast_fp16, y = transpose_37_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask_sliding)[name = string("x_115_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_115_cast_fp16)[name = string("reduce_max_7")]; + tensor var_3295 = sub(x = x_115_cast_fp16, y = reduce_max_7)[name = string("op_3295")]; + tensor var_3301 = exp(x = var_3295)[name = string("op_3301")]; + tensor var_3311_axes_0 = const()[name = string("op_3311_axes_0"), val = tensor([-1])]; + bool var_3311_keep_dims_0 = const()[name = string("op_3311_keep_dims_0"), val = bool(true)]; + tensor var_3311 = reduce_sum(axes = var_3311_axes_0, keep_dims = var_3311_keep_dims_0, x = var_3301)[name = string("op_3311")]; + tensor var_3317_cast_fp16 = real_div(x = var_3301, y = var_3311)[name = string("op_3317_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = var_3317_cast_fp16, y = V_expanded_1_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_3328 = const()[name = string("op_3328"), val = tensor([0, 2, 1, 3])]; + tensor var_3335 = const()[name = string("op_3335"), val = tensor([1, 3, -1])]; + tensor var_3329_cast_fp16 = transpose(perm = var_3328, x = attn_output_43_cast_fp16)[name = string("transpose_20")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_3335, x = var_3329_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_3340 = const()[name = string("op_3340"), val = tensor([0, 2, 1])]; + string var_3356_pad_type_0 = const()[name = string("op_3356_pad_type_0"), val = string("valid")]; + int32 var_3356_groups_0 = const()[name = string("op_3356_groups_0"), val = int32(1)]; + tensor var_3356_strides_0 = const()[name = string("op_3356_strides_0"), val = tensor([1])]; + tensor var_3356_pad_0 = const()[name = string("op_3356_pad_0"), val = tensor([0, 0])]; + tensor var_3356_dilations_0 = const()[name = string("op_3356_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409418944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412040448))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3341_cast_fp16 = transpose(perm = var_3340, x = attn_output_45_cast_fp16)[name = string("transpose_19")]; + tensor var_3356_cast_fp16 = conv(dilations = var_3356_dilations_0, groups = var_3356_groups_0, pad = var_3356_pad_0, pad_type = var_3356_pad_type_0, strides = var_3356_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_3341_cast_fp16)[name = string("op_3356_cast_fp16")]; + tensor var_3360 = const()[name = string("op_3360"), val = tensor([0, 2, 1])]; + int32 var_3366 = const()[name = string("op_3366"), val = int32(-1)]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_119_cast_fp16 = transpose(perm = var_3360, x = var_3356_cast_fp16)[name = string("transpose_18")]; + tensor var_3368_cast_fp16 = mul(x = x_119_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3368_cast_fp16")]; + bool input_177_interleave_0 = const()[name = string("input_177_interleave_0"), val = bool(false)]; + tensor input_177_cast_fp16 = concat(axis = var_3366, interleave = input_177_interleave_0, values = (x_119_cast_fp16, var_3368_cast_fp16))[name = string("input_177_cast_fp16")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_3363_to_fp16 = const()[name = string("op_3363_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_3363_to_fp16, x = input_177_cast_fp16)[name = string("normed_177_cast_fp16")]; + tensor var_3373_split_sizes_0 = const()[name = string("op_3373_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3373_axis_0 = const()[name = string("op_3373_axis_0"), val = int32(-1)]; + tensor var_3373_cast_fp16_0, tensor var_3373_cast_fp16_1 = split(axis = var_3373_axis_0, split_sizes = var_3373_split_sizes_0, x = normed_177_cast_fp16)[name = string("op_3373_cast_fp16")]; + tensor layers_7_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412043072)))]; + tensor attn_output_47_cast_fp16 = mul(x = var_3373_cast_fp16_0, y = layers_7_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_47_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_111_cast_fp16, y = attn_output_47_cast_fp16)[name = string("x_121_cast_fp16")]; + int32 var_3382 = const()[name = string("op_3382"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3384_cast_fp16 = mul(x = x_121_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_3384_cast_fp16")]; + bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; + tensor input_179_cast_fp16 = concat(axis = var_3382, interleave = input_179_interleave_0, values = (x_121_cast_fp16, var_3384_cast_fp16))[name = string("input_179_cast_fp16")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_3379_to_fp16 = const()[name = string("op_3379_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_3379_to_fp16, x = input_179_cast_fp16)[name = string("normed_181_cast_fp16")]; + tensor var_3389_split_sizes_0 = const()[name = string("op_3389_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3389_axis_0 = const()[name = string("op_3389_axis_0"), val = int32(-1)]; + tensor var_3389_cast_fp16_0, tensor var_3389_cast_fp16_1 = split(axis = var_3389_axis_0, split_sizes = var_3389_split_sizes_0, x = normed_181_cast_fp16)[name = string("op_3389_cast_fp16")]; + tensor layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412048256)))]; + tensor h_45_cast_fp16 = mul(x = var_3389_cast_fp16_0, y = layers_7_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_45_cast_fp16")]; + tensor var_3400 = const()[name = string("op_3400"), val = tensor([0, 2, 1])]; + tensor input_181_axes_0 = const()[name = string("input_181_axes_0"), val = tensor([2])]; + tensor var_3401 = transpose(perm = var_3400, x = h_45_cast_fp16)[name = string("transpose_17")]; + tensor input_181 = expand_dims(axes = input_181_axes_0, x = var_3401)[name = string("input_181")]; + string gate_29_pad_type_0 = const()[name = string("gate_29_pad_type_0"), val = string("valid")]; + tensor gate_29_strides_0 = const()[name = string("gate_29_strides_0"), val = tensor([1, 1])]; + tensor gate_29_pad_0 = const()[name = string("gate_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_29_dilations_0 = const()[name = string("gate_29_dilations_0"), val = tensor([1, 1])]; + int32 gate_29_groups_0 = const()[name = string("gate_29_groups_0"), val = int32(1)]; + tensor gate_29 = conv(dilations = gate_29_dilations_0, groups = gate_29_groups_0, pad = gate_29_pad_0, pad_type = gate_29_pad_type_0, strides = gate_29_strides_0, weight = layers_7_mlp_gate_proj_weight_palettized, x = input_181)[name = string("gate_29")]; + string up_15_pad_type_0 = const()[name = string("up_15_pad_type_0"), val = string("valid")]; + tensor up_15_strides_0 = const()[name = string("up_15_strides_0"), val = tensor([1, 1])]; + tensor up_15_pad_0 = const()[name = string("up_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_15_dilations_0 = const()[name = string("up_15_dilations_0"), val = tensor([1, 1])]; + int32 up_15_groups_0 = const()[name = string("up_15_groups_0"), val = int32(1)]; + tensor up_15 = conv(dilations = up_15_dilations_0, groups = up_15_groups_0, pad = up_15_pad_0, pad_type = up_15_pad_type_0, strides = up_15_strides_0, weight = layers_7_mlp_up_proj_weight_palettized, x = input_181)[name = string("up_15")]; + string gate_31_mode_0 = const()[name = string("gate_31_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate_31 = gelu(mode = gate_31_mode_0, x = gate_29)[name = string("gate_31")]; + tensor input_183 = mul(x = gate_31, y = up_15)[name = string("input_183")]; + string mlp_out_15_pad_type_0 = const()[name = string("mlp_out_15_pad_type_0"), val = string("valid")]; + tensor mlp_out_15_strides_0 = const()[name = string("mlp_out_15_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_15_pad_0 = const()[name = string("mlp_out_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_15_dilations_0 = const()[name = string("mlp_out_15_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_15_groups_0 = const()[name = string("mlp_out_15_groups_0"), val = int32(1)]; + tensor mlp_out_15 = conv(dilations = mlp_out_15_dilations_0, groups = mlp_out_15_groups_0, pad = mlp_out_15_pad_0, pad_type = mlp_out_15_pad_type_0, strides = mlp_out_15_strides_0, weight = layers_7_mlp_down_proj_weight_palettized, x = input_183)[name = string("mlp_out_15")]; + tensor var_3441_axes_0 = const()[name = string("op_3441_axes_0"), val = tensor([2])]; + tensor var_3441 = squeeze(axes = var_3441_axes_0, x = mlp_out_15)[name = string("op_3441")]; + tensor var_3445 = const()[name = string("op_3445"), val = tensor([0, 2, 1])]; + int32 var_3451 = const()[name = string("op_3451"), val = int32(-1)]; + fp16 const_61_promoted = const()[name = string("const_61_promoted"), val = fp16(-0x1p+0)]; + tensor x_123 = transpose(perm = var_3445, x = var_3441)[name = string("transpose_16")]; + tensor var_3453 = mul(x = x_123, y = const_61_promoted)[name = string("op_3453")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_3451, interleave = input_185_interleave_0, values = (x_123, var_3453))[name = string("input_185")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_3448_to_fp16 = const()[name = string("op_3448_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_3448_to_fp16, x = input_185)[name = string("normed_185_cast_fp16")]; + tensor var_3458_split_sizes_0 = const()[name = string("op_3458_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3458_axis_0 = const()[name = string("op_3458_axis_0"), val = int32(-1)]; + tensor var_3458_0, tensor var_3458_1 = split(axis = var_3458_axis_0, split_sizes = var_3458_split_sizes_0, x = normed_185_cast_fp16)[name = string("op_3458")]; + tensor hidden_states_73 = mul(x = var_3458_0, y = layers_7_post_feedforward_layernorm_weight)[name = string("hidden_states_73")]; + tensor hidden_states_75_cast_fp16 = add(x = x_121_cast_fp16, y = hidden_states_73)[name = string("hidden_states_75_cast_fp16")]; + tensor per_layer_slice_15_begin_0 = const()[name = string("per_layer_slice_15_begin_0"), val = tensor([0, 0, 10240])]; + tensor per_layer_slice_15_end_0 = const()[name = string("per_layer_slice_15_end_0"), val = tensor([1, 3, 10496])]; + tensor per_layer_slice_15_end_mask_0 = const()[name = string("per_layer_slice_15_end_mask_0"), val = tensor([true, true, false])]; + tensor per_layer_slice_15_cast_fp16 = slice_by_index(begin = per_layer_slice_15_begin_0, end = per_layer_slice_15_end_0, end_mask = per_layer_slice_15_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_15_cast_fp16")]; + tensor var_3486 = const()[name = string("op_3486"), val = tensor([0, 2, 1])]; + tensor input_187_axes_0 = const()[name = string("input_187_axes_0"), val = tensor([2])]; + tensor var_3487 = transpose(perm = var_3486, x = hidden_states_75_cast_fp16)[name = string("transpose_15")]; + tensor input_187 = expand_dims(axes = input_187_axes_0, x = var_3487)[name = string("input_187")]; + string gated_43_pad_type_0 = const()[name = string("gated_43_pad_type_0"), val = string("valid")]; + tensor gated_43_strides_0 = const()[name = string("gated_43_strides_0"), val = tensor([1, 1])]; + tensor gated_43_pad_0 = const()[name = string("gated_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_43_dilations_0 = const()[name = string("gated_43_dilations_0"), val = tensor([1, 1])]; + int32 gated_43_groups_0 = const()[name = string("gated_43_groups_0"), val = int32(1)]; + tensor gated_43 = conv(dilations = gated_43_dilations_0, groups = gated_43_groups_0, pad = gated_43_pad_0, pad_type = gated_43_pad_type_0, strides = gated_43_strides_0, weight = layers_7_per_layer_input_gate_weight_palettized, x = input_187)[name = string("gated_43")]; + string gated_45_mode_0 = const()[name = string("gated_45_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_45 = gelu(mode = gated_45_mode_0, x = gated_43)[name = string("gated_45")]; + tensor var_3506 = const()[name = string("op_3506"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_15_axes_0 = const()[name = string("per_layer_slice_conv_15_axes_0"), val = tensor([2])]; + tensor var_3507_cast_fp16 = transpose(perm = var_3506, x = per_layer_slice_15_cast_fp16)[name = string("transpose_14")]; + tensor per_layer_slice_conv_15_cast_fp16 = expand_dims(axes = per_layer_slice_conv_15_axes_0, x = var_3507_cast_fp16)[name = string("per_layer_slice_conv_15_cast_fp16")]; + tensor input_189_cast_fp16 = mul(x = gated_45, y = per_layer_slice_conv_15_cast_fp16)[name = string("input_189_cast_fp16")]; + string gated_47_pad_type_0 = const()[name = string("gated_47_pad_type_0"), val = string("valid")]; + tensor gated_47_strides_0 = const()[name = string("gated_47_strides_0"), val = tensor([1, 1])]; + tensor gated_47_pad_0 = const()[name = string("gated_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_47_dilations_0 = const()[name = string("gated_47_dilations_0"), val = tensor([1, 1])]; + int32 gated_47_groups_0 = const()[name = string("gated_47_groups_0"), val = int32(1)]; + tensor layers_7_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412053440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412381184))))[name = string("layers_7_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_47_cast_fp16 = conv(dilations = gated_47_dilations_0, groups = gated_47_groups_0, pad = gated_47_pad_0, pad_type = gated_47_pad_type_0, strides = gated_47_strides_0, weight = layers_7_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_189_cast_fp16)[name = string("gated_47_cast_fp16")]; + tensor var_3523_axes_0 = const()[name = string("op_3523_axes_0"), val = tensor([2])]; + tensor var_3523_cast_fp16 = squeeze(axes = var_3523_axes_0, x = gated_47_cast_fp16)[name = string("op_3523_cast_fp16")]; + tensor var_3527 = const()[name = string("op_3527"), val = tensor([0, 2, 1])]; + int32 var_3533 = const()[name = string("op_3533"), val = int32(-1)]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_125_cast_fp16 = transpose(perm = var_3527, x = var_3523_cast_fp16)[name = string("transpose_13")]; + tensor var_3535_cast_fp16 = mul(x = x_125_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_3535_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_3533, interleave = input_191_interleave_0, values = (x_125_cast_fp16, var_3535_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_3530_to_fp16, x = input_191_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor var_3540_split_sizes_0 = const()[name = string("op_3540_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3540_axis_0 = const()[name = string("op_3540_axis_0"), val = int32(-1)]; + tensor var_3540_cast_fp16_0, tensor var_3540_cast_fp16_1 = split(axis = var_3540_axis_0, split_sizes = var_3540_split_sizes_0, x = normed_189_cast_fp16)[name = string("op_3540_cast_fp16")]; + tensor layers_7_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_7_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412383808)))]; + tensor hidden_states_79_cast_fp16 = mul(x = var_3540_cast_fp16_0, y = layers_7_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_79_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + tensor const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor([0x1.9ep-1])]; + tensor x_127_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_63_promoted_to_fp16)[name = string("x_127_cast_fp16")]; + int32 var_3555 = const()[name = string("op_3555"), val = int32(-1)]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3557_cast_fp16 = mul(x = x_127_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_3557_cast_fp16")]; + bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; + tensor input_193_cast_fp16 = concat(axis = var_3555, interleave = input_193_interleave_0, values = (x_127_cast_fp16, var_3557_cast_fp16))[name = string("input_193_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_3552_to_fp16 = const()[name = string("op_3552_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_3552_to_fp16, x = input_193_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor var_3562_split_sizes_0 = const()[name = string("op_3562_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3562_axis_0 = const()[name = string("op_3562_axis_0"), val = int32(-1)]; + tensor var_3562_cast_fp16_0, tensor var_3562_cast_fp16_1 = split(axis = var_3562_axis_0, split_sizes = var_3562_split_sizes_0, x = normed_193_cast_fp16)[name = string("op_3562_cast_fp16")]; + tensor layers_8_input_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412388992)))]; + tensor h_49_cast_fp16 = mul(x = var_3562_cast_fp16_0, y = layers_8_input_layernorm_weight_promoted_to_fp16)[name = string("h_49_cast_fp16")]; + tensor var_3568 = const()[name = string("op_3568"), val = tensor([0, 2, 1])]; + tensor var_3571_axes_0 = const()[name = string("op_3571_axes_0"), val = tensor([2])]; + tensor var_3569_cast_fp16 = transpose(perm = var_3568, x = h_49_cast_fp16)[name = string("transpose_12")]; + tensor var_3571_cast_fp16 = expand_dims(axes = var_3571_axes_0, x = var_3569_cast_fp16)[name = string("op_3571_cast_fp16")]; + string q_81_pad_type_0 = const()[name = string("q_81_pad_type_0"), val = string("valid")]; + tensor q_81_strides_0 = const()[name = string("q_81_strides_0"), val = tensor([1, 1])]; + tensor q_81_pad_0 = const()[name = string("q_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor q_81_dilations_0 = const()[name = string("q_81_dilations_0"), val = tensor([1, 1])]; + int32 q_81_groups_0 = const()[name = string("q_81_groups_0"), val = int32(1)]; + tensor q_81 = conv(dilations = q_81_dilations_0, groups = q_81_groups_0, pad = q_81_pad_0, pad_type = q_81_pad_type_0, strides = q_81_strides_0, weight = layers_8_self_attn_q_proj_weight_palettized, x = var_3571_cast_fp16)[name = string("q_81")]; + tensor var_3592 = const()[name = string("op_3592"), val = tensor([1, 8, 512, 3])]; + tensor var_3593 = reshape(shape = var_3592, x = q_81)[name = string("op_3593")]; + tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([0, 3, 1, 2])]; + tensor var_3616 = const()[name = string("op_3616"), val = tensor([3, 8, 512])]; + tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = var_3593)[name = string("transpose_11")]; + tensor x_129 = reshape(shape = var_3616, x = transpose_52)[name = string("x_129")]; + int32 var_3622 = const()[name = string("op_3622"), val = int32(-1)]; + fp16 const_65_promoted = const()[name = string("const_65_promoted"), val = fp16(-0x1p+0)]; + tensor var_3624 = mul(x = x_129, y = const_65_promoted)[name = string("op_3624")]; + bool input_197_interleave_0 = const()[name = string("input_197_interleave_0"), val = bool(false)]; + tensor input_197 = concat(axis = var_3622, interleave = input_197_interleave_0, values = (x_129, var_3624))[name = string("input_197")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_3619_to_fp16 = const()[name = string("op_3619_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_3619_to_fp16, x = input_197)[name = string("normed_197_cast_fp16")]; + tensor var_3629_split_sizes_0 = const()[name = string("op_3629_split_sizes_0"), val = tensor([512, 512])]; + int32 var_3629_axis_0 = const()[name = string("op_3629_axis_0"), val = int32(-1)]; + tensor var_3629_0, tensor var_3629_1 = split(axis = var_3629_axis_0, split_sizes = var_3629_split_sizes_0, x = normed_197_cast_fp16)[name = string("op_3629")]; + tensor q_85 = mul(x = var_3629_0, y = layers_2_self_attn_q_norm_weight)[name = string("q_85")]; + tensor var_3636 = const()[name = string("op_3636"), val = tensor([1, 3, 8, 512])]; + tensor var_3637 = reshape(shape = var_3636, x = q_85)[name = string("op_3637")]; + tensor var_3642 = const()[name = string("op_3642"), val = tensor([0, 2, 1, 3])]; + tensor q_87 = transpose(perm = var_3642, x = var_3637)[name = string("transpose_10")]; + tensor var_3644_cast_fp16 = mul(x = q_87, y = cos_f)[name = string("op_3644_cast_fp16")]; + tensor var_3645_split_sizes_0 = const()[name = string("op_3645_split_sizes_0"), val = tensor([256, 256])]; + int32 var_3645_axis_0 = const()[name = string("op_3645_axis_0"), val = int32(-1)]; + tensor var_3645_0, tensor var_3645_1 = split(axis = var_3645_axis_0, split_sizes = var_3645_split_sizes_0, x = q_87)[name = string("op_3645")]; + fp16 const_66_promoted = const()[name = string("const_66_promoted"), val = fp16(-0x1p+0)]; + tensor var_3647 = mul(x = var_3645_1, y = const_66_promoted)[name = string("op_3647")]; + int32 var_3649 = const()[name = string("op_3649"), val = int32(-1)]; + bool var_3650_interleave_0 = const()[name = string("op_3650_interleave_0"), val = bool(false)]; + tensor var_3650 = concat(axis = var_3649, interleave = var_3650_interleave_0, values = (var_3647, var_3645_0))[name = string("op_3650")]; + tensor var_3651_cast_fp16 = mul(x = var_3650, y = sin_f)[name = string("op_3651_cast_fp16")]; + tensor q_cast_fp16 = add(x = var_3644_cast_fp16, y = var_3651_cast_fp16)[name = string("q_cast_fp16")]; + bool attn_weights_33_transpose_x_0 = const()[name = string("attn_weights_33_transpose_x_0"), val = bool(false)]; + bool attn_weights_33_transpose_y_0 = const()[name = string("attn_weights_33_transpose_y_0"), val = bool(false)]; + tensor attn_weights_33_cast_fp16 = matmul(transpose_x = attn_weights_33_transpose_x_0, transpose_y = attn_weights_33_transpose_y_0, x = q_cast_fp16, y = transpose_41_cast_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor x_131_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask_full)[name = string("x_131_cast_fp16")]; + tensor reduce_max_8_axes_0 = const()[name = string("reduce_max_8_axes_0"), val = tensor([-1])]; + bool reduce_max_8_keep_dims_0 = const()[name = string("reduce_max_8_keep_dims_0"), val = bool(true)]; + tensor reduce_max_8 = reduce_max(axes = reduce_max_8_axes_0, keep_dims = reduce_max_8_keep_dims_0, x = x_131_cast_fp16)[name = string("reduce_max_8")]; + tensor var_3683 = sub(x = x_131_cast_fp16, y = reduce_max_8)[name = string("op_3683")]; + tensor var_3689 = exp(x = var_3683)[name = string("op_3689")]; + tensor var_3699_axes_0 = const()[name = string("op_3699_axes_0"), val = tensor([-1])]; + bool var_3699_keep_dims_0 = const()[name = string("op_3699_keep_dims_0"), val = bool(true)]; + tensor var_3699 = reduce_sum(axes = var_3699_axes_0, keep_dims = var_3699_keep_dims_0, x = var_3689)[name = string("op_3699")]; + tensor var_3705_cast_fp16 = real_div(x = var_3689, y = var_3699)[name = string("op_3705_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_3705_cast_fp16, y = V_expanded_5_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_3716 = const()[name = string("op_3716"), val = tensor([0, 2, 1, 3])]; + tensor var_3723 = const()[name = string("op_3723"), val = tensor([1, 3, -1])]; + tensor var_3717_cast_fp16 = transpose(perm = var_3716, x = attn_output_49_cast_fp16)[name = string("transpose_9")]; + tensor attn_output_51_cast_fp16 = reshape(shape = var_3723, x = var_3717_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_3728 = const()[name = string("op_3728"), val = tensor([0, 2, 1])]; + string var_3744_pad_type_0 = const()[name = string("op_3744_pad_type_0"), val = string("valid")]; + int32 var_3744_groups_0 = const()[name = string("op_3744_groups_0"), val = int32(1)]; + tensor var_3744_strides_0 = const()[name = string("op_3744_strides_0"), val = tensor([1])]; + tensor var_3744_pad_0 = const()[name = string("op_3744_pad_0"), val = tensor([0, 0])]; + tensor var_3744_dilations_0 = const()[name = string("op_3744_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412394176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417637120))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3729_cast_fp16 = transpose(perm = var_3728, x = attn_output_51_cast_fp16)[name = string("transpose_8")]; + tensor var_3744_cast_fp16 = conv(dilations = var_3744_dilations_0, groups = var_3744_groups_0, pad = var_3744_pad_0, pad_type = var_3744_pad_type_0, strides = var_3744_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_3729_cast_fp16)[name = string("op_3744_cast_fp16")]; + tensor var_3748 = const()[name = string("op_3748"), val = tensor([0, 2, 1])]; + int32 var_3754 = const()[name = string("op_3754"), val = int32(-1)]; + fp16 const_67_promoted_to_fp16 = const()[name = string("const_67_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_135_cast_fp16 = transpose(perm = var_3748, x = var_3744_cast_fp16)[name = string("transpose_7")]; + tensor var_3756_cast_fp16 = mul(x = x_135_cast_fp16, y = const_67_promoted_to_fp16)[name = string("op_3756_cast_fp16")]; + bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; + tensor input_201_cast_fp16 = concat(axis = var_3754, interleave = input_201_interleave_0, values = (x_135_cast_fp16, var_3756_cast_fp16))[name = string("input_201_cast_fp16")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_3751_to_fp16 = const()[name = string("op_3751_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_3751_to_fp16, x = input_201_cast_fp16)[name = string("normed_201_cast_fp16")]; + tensor var_3761_split_sizes_0 = const()[name = string("op_3761_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3761_axis_0 = const()[name = string("op_3761_axis_0"), val = int32(-1)]; + tensor var_3761_cast_fp16_0, tensor var_3761_cast_fp16_1 = split(axis = var_3761_axis_0, split_sizes = var_3761_split_sizes_0, x = normed_201_cast_fp16)[name = string("op_3761_cast_fp16")]; + tensor layers_8_post_attention_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417639744)))]; + tensor attn_output_cast_fp16 = mul(x = var_3761_cast_fp16_0, y = layers_8_post_attention_layernorm_weight_promoted_to_fp16)[name = string("attn_output_cast_fp16")]; + tensor x_137_cast_fp16 = add(x = x_127_cast_fp16, y = attn_output_cast_fp16)[name = string("x_137_cast_fp16")]; + int32 var_3770 = const()[name = string("op_3770"), val = int32(-1)]; + fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3772_cast_fp16 = mul(x = x_137_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_3772_cast_fp16")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203_cast_fp16 = concat(axis = var_3770, interleave = input_203_interleave_0, values = (x_137_cast_fp16, var_3772_cast_fp16))[name = string("input_203_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_3767_to_fp16 = const()[name = string("op_3767_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_3767_to_fp16, x = input_203_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor var_3777_split_sizes_0 = const()[name = string("op_3777_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3777_axis_0 = const()[name = string("op_3777_axis_0"), val = int32(-1)]; + tensor var_3777_cast_fp16_0, tensor var_3777_cast_fp16_1 = split(axis = var_3777_axis_0, split_sizes = var_3777_split_sizes_0, x = normed_205_cast_fp16)[name = string("op_3777_cast_fp16")]; + tensor layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16 = const()[name = string("layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417644928)))]; + tensor h_51_cast_fp16 = mul(x = var_3777_cast_fp16_0, y = layers_8_pre_feedforward_layernorm_weight_promoted_to_fp16)[name = string("h_51_cast_fp16")]; + tensor var_3788 = const()[name = string("op_3788"), val = tensor([0, 2, 1])]; + tensor input_205_axes_0 = const()[name = string("input_205_axes_0"), val = tensor([2])]; + tensor var_3789 = transpose(perm = var_3788, x = h_51_cast_fp16)[name = string("transpose_6")]; + tensor input_205 = expand_dims(axes = input_205_axes_0, x = var_3789)[name = string("input_205")]; + string gate_33_pad_type_0 = const()[name = string("gate_33_pad_type_0"), val = string("valid")]; + tensor gate_33_strides_0 = const()[name = string("gate_33_strides_0"), val = tensor([1, 1])]; + tensor gate_33_pad_0 = const()[name = string("gate_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gate_33_dilations_0 = const()[name = string("gate_33_dilations_0"), val = tensor([1, 1])]; + int32 gate_33_groups_0 = const()[name = string("gate_33_groups_0"), val = int32(1)]; + tensor gate_33 = conv(dilations = gate_33_dilations_0, groups = gate_33_groups_0, pad = gate_33_pad_0, pad_type = gate_33_pad_type_0, strides = gate_33_strides_0, weight = layers_8_mlp_gate_proj_weight_palettized, x = input_205)[name = string("gate_33")]; + string up_pad_type_0 = const()[name = string("up_pad_type_0"), val = string("valid")]; + tensor up_strides_0 = const()[name = string("up_strides_0"), val = tensor([1, 1])]; + tensor up_pad_0 = const()[name = string("up_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_dilations_0 = const()[name = string("up_dilations_0"), val = tensor([1, 1])]; + int32 up_groups_0 = const()[name = string("up_groups_0"), val = int32(1)]; + tensor up = conv(dilations = up_dilations_0, groups = up_groups_0, pad = up_pad_0, pad_type = up_pad_type_0, strides = up_strides_0, weight = layers_8_mlp_up_proj_weight_palettized, x = input_205)[name = string("up")]; + string gate_mode_0 = const()[name = string("gate_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gate = gelu(mode = gate_mode_0, x = gate_33)[name = string("gate")]; + tensor input_207 = mul(x = gate, y = up)[name = string("input_207")]; + string mlp_out_pad_type_0 = const()[name = string("mlp_out_pad_type_0"), val = string("valid")]; + tensor mlp_out_strides_0 = const()[name = string("mlp_out_strides_0"), val = tensor([1, 1])]; + tensor mlp_out_pad_0 = const()[name = string("mlp_out_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor mlp_out_dilations_0 = const()[name = string("mlp_out_dilations_0"), val = tensor([1, 1])]; + int32 mlp_out_groups_0 = const()[name = string("mlp_out_groups_0"), val = int32(1)]; + tensor mlp_out = conv(dilations = mlp_out_dilations_0, groups = mlp_out_groups_0, pad = mlp_out_pad_0, pad_type = mlp_out_pad_type_0, strides = mlp_out_strides_0, weight = layers_8_mlp_down_proj_weight_palettized, x = input_207)[name = string("mlp_out")]; + tensor var_3829_axes_0 = const()[name = string("op_3829_axes_0"), val = tensor([2])]; + tensor var_3829 = squeeze(axes = var_3829_axes_0, x = mlp_out)[name = string("op_3829")]; + tensor var_3833 = const()[name = string("op_3833"), val = tensor([0, 2, 1])]; + int32 var_3839 = const()[name = string("op_3839"), val = int32(-1)]; + fp16 const_69_promoted = const()[name = string("const_69_promoted"), val = fp16(-0x1p+0)]; + tensor x_139 = transpose(perm = var_3833, x = var_3829)[name = string("transpose_5")]; + tensor var_3841 = mul(x = x_139, y = const_69_promoted)[name = string("op_3841")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209 = concat(axis = var_3839, interleave = input_209_interleave_0, values = (x_139, var_3841))[name = string("input_209")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_3836_to_fp16 = const()[name = string("op_3836_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_3836_to_fp16, x = input_209)[name = string("normed_209_cast_fp16")]; + tensor var_3846_split_sizes_0 = const()[name = string("op_3846_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3846_axis_0 = const()[name = string("op_3846_axis_0"), val = int32(-1)]; + tensor var_3846_0, tensor var_3846_1 = split(axis = var_3846_axis_0, split_sizes = var_3846_split_sizes_0, x = normed_209_cast_fp16)[name = string("op_3846")]; + tensor hidden_states_83 = mul(x = var_3846_0, y = layers_8_post_feedforward_layernorm_weight)[name = string("hidden_states_83")]; + tensor hidden_states_85_cast_fp16 = add(x = x_137_cast_fp16, y = hidden_states_83)[name = string("hidden_states_85_cast_fp16")]; + tensor per_layer_slice_begin_0 = const()[name = string("per_layer_slice_begin_0"), val = tensor([0, 0, 10496])]; + tensor per_layer_slice_end_0 = const()[name = string("per_layer_slice_end_0"), val = tensor([1, 3, 1])]; + tensor per_layer_slice_end_mask_0 = const()[name = string("per_layer_slice_end_mask_0"), val = tensor([true, true, true])]; + tensor per_layer_slice_cast_fp16 = slice_by_index(begin = per_layer_slice_begin_0, end = per_layer_slice_end_0, end_mask = per_layer_slice_end_mask_0, x = per_layer_combined)[name = string("per_layer_slice_cast_fp16")]; + tensor var_3874 = const()[name = string("op_3874"), val = tensor([0, 2, 1])]; + tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; + tensor var_3875 = transpose(perm = var_3874, x = hidden_states_85_cast_fp16)[name = string("transpose_4")]; + tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_3875)[name = string("input_211")]; + string gated_49_pad_type_0 = const()[name = string("gated_49_pad_type_0"), val = string("valid")]; + tensor gated_49_strides_0 = const()[name = string("gated_49_strides_0"), val = tensor([1, 1])]; + tensor gated_49_pad_0 = const()[name = string("gated_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_49_dilations_0 = const()[name = string("gated_49_dilations_0"), val = tensor([1, 1])]; + int32 gated_49_groups_0 = const()[name = string("gated_49_groups_0"), val = int32(1)]; + tensor gated_49 = conv(dilations = gated_49_dilations_0, groups = gated_49_groups_0, pad = gated_49_pad_0, pad_type = gated_49_pad_type_0, strides = gated_49_strides_0, weight = layers_8_per_layer_input_gate_weight_palettized, x = input_211)[name = string("gated_49")]; + string gated_51_mode_0 = const()[name = string("gated_51_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor gated_51 = gelu(mode = gated_51_mode_0, x = gated_49)[name = string("gated_51")]; + tensor var_3894 = const()[name = string("op_3894"), val = tensor([0, 2, 1])]; + tensor per_layer_slice_conv_axes_0 = const()[name = string("per_layer_slice_conv_axes_0"), val = tensor([2])]; + tensor var_3895_cast_fp16 = transpose(perm = var_3894, x = per_layer_slice_cast_fp16)[name = string("transpose_3")]; + tensor per_layer_slice_conv_cast_fp16 = expand_dims(axes = per_layer_slice_conv_axes_0, x = var_3895_cast_fp16)[name = string("per_layer_slice_conv_cast_fp16")]; + tensor input_213_cast_fp16 = mul(x = gated_51, y = per_layer_slice_conv_cast_fp16)[name = string("input_213_cast_fp16")]; + string gated_pad_type_0 = const()[name = string("gated_pad_type_0"), val = string("valid")]; + tensor gated_strides_0 = const()[name = string("gated_strides_0"), val = tensor([1, 1])]; + tensor gated_pad_0 = const()[name = string("gated_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor gated_dilations_0 = const()[name = string("gated_dilations_0"), val = tensor([1, 1])]; + int32 gated_groups_0 = const()[name = string("gated_groups_0"), val = int32(1)]; + tensor layers_8_per_layer_projection_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417650112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417977856))))[name = string("layers_8_per_layer_projection_weight_promoted_to_fp16_palettized")]; + tensor gated_cast_fp16 = conv(dilations = gated_dilations_0, groups = gated_groups_0, pad = gated_pad_0, pad_type = gated_pad_type_0, strides = gated_strides_0, weight = layers_8_per_layer_projection_weight_promoted_to_fp16_palettized, x = input_213_cast_fp16)[name = string("gated_cast_fp16")]; + tensor var_3911_axes_0 = const()[name = string("op_3911_axes_0"), val = tensor([2])]; + tensor var_3911_cast_fp16 = squeeze(axes = var_3911_axes_0, x = gated_cast_fp16)[name = string("op_3911_cast_fp16")]; + tensor var_3915 = const()[name = string("op_3915"), val = tensor([0, 2, 1])]; + int32 var_3921 = const()[name = string("op_3921"), val = int32(-1)]; + fp16 const_70_promoted_to_fp16 = const()[name = string("const_70_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor x_141_cast_fp16 = transpose(perm = var_3915, x = var_3911_cast_fp16)[name = string("transpose_2")]; + tensor var_3923_cast_fp16 = mul(x = x_141_cast_fp16, y = const_70_promoted_to_fp16)[name = string("op_3923_cast_fp16")]; + bool input_215_interleave_0 = const()[name = string("input_215_interleave_0"), val = bool(false)]; + tensor input_215_cast_fp16 = concat(axis = var_3921, interleave = input_215_interleave_0, values = (x_141_cast_fp16, var_3923_cast_fp16))[name = string("input_215_cast_fp16")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_3918_to_fp16 = const()[name = string("op_3918_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_3918_to_fp16, x = input_215_cast_fp16)[name = string("normed_213_cast_fp16")]; + tensor var_3928_split_sizes_0 = const()[name = string("op_3928_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3928_axis_0 = const()[name = string("op_3928_axis_0"), val = int32(-1)]; + tensor var_3928_cast_fp16_0, tensor var_3928_cast_fp16_1 = split(axis = var_3928_axis_0, split_sizes = var_3928_split_sizes_0, x = normed_213_cast_fp16)[name = string("op_3928_cast_fp16")]; + tensor layers_8_post_per_layer_input_norm_weight_promoted_to_fp16 = const()[name = string("layers_8_post_per_layer_input_norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417980480)))]; + tensor hidden_states_89_cast_fp16 = mul(x = var_3928_cast_fp16_0, y = layers_8_post_per_layer_input_norm_weight_promoted_to_fp16)[name = string("hidden_states_89_cast_fp16")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_85_cast_fp16, y = hidden_states_89_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = tensor([0x1.c8p-2])]; + tensor hidden_states_out = mul(x = hidden_states_cast_fp16, y = const_71_promoted_to_fp16)[name = string("x_cast_fp16")]; + int32 var_3943 = const()[name = string("op_3943"), val = int32(-1)]; + fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3945_cast_fp16 = mul(x = hidden_states_out, y = const_72_promoted_to_fp16)[name = string("op_3945_cast_fp16")]; + bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; + tensor input_217_cast_fp16 = concat(axis = var_3943, interleave = input_217_interleave_0, values = (hidden_states_out, var_3945_cast_fp16))[name = string("input_217_cast_fp16")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_3940_to_fp16 = const()[name = string("op_3940_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_3940_to_fp16, x = input_217_cast_fp16)[name = string("normed_217_cast_fp16")]; + tensor var_3950_split_sizes_0 = const()[name = string("op_3950_split_sizes_0"), val = tensor([2560, 2560])]; + int32 var_3950_axis_0 = const()[name = string("op_3950_axis_0"), val = int32(-1)]; + tensor var_3950_cast_fp16_0, tensor var_3950_cast_fp16_1 = split(axis = var_3950_axis_0, split_sizes = var_3950_split_sizes_0, x = normed_217_cast_fp16)[name = string("op_3950_cast_fp16")]; + tensor norm_weight_promoted_to_fp16 = const()[name = string("norm_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417985664)))]; + tensor normed_221_cast_fp16 = mul(x = var_3950_cast_fp16_0, y = norm_weight_promoted_to_fp16)[name = string("normed_221_cast_fp16")]; + tensor var_3961 = const()[name = string("op_3961"), val = tensor([0, 2, 1])]; + tensor squeeze_9_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417990848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(753535232))))[name = string("squeeze_9_palettized")]; + string var_3977_pad_type_0 = const()[name = string("op_3977_pad_type_0"), val = string("valid")]; + int32 var_3977_groups_0 = const()[name = string("op_3977_groups_0"), val = int32(1)]; + tensor var_3977_strides_0 = const()[name = string("op_3977_strides_0"), val = tensor([1])]; + tensor var_3977_pad_0 = const()[name = string("op_3977_pad_0"), val = tensor([0, 0])]; + tensor var_3977_dilations_0 = const()[name = string("op_3977_dilations_0"), val = tensor([1])]; + tensor var_3962 = transpose(perm = var_3961, x = normed_221_cast_fp16)[name = string("transpose_1")]; + tensor var_3977 = conv(dilations = var_3977_dilations_0, groups = var_3977_groups_0, pad = var_3977_pad_0, pad_type = var_3977_pad_type_0, strides = var_3977_strides_0, weight = squeeze_9_palettized, x = var_3962)[name = string("op_3977")]; + tensor var_3981 = const()[name = string("op_3981"), val = tensor([0, 2, 1])]; + fp16 _inversed_3984_y_0_to_fp16 = const()[name = string("_inversed_3984_y_0_to_fp16"), val = fp16(0x1.11p-5)]; + tensor logits_1 = transpose(perm = var_3981, x = var_3977)[name = string("transpose_0")]; + tensor _inversed_3984_cast_fp16 = mul(x = logits_1, y = _inversed_3984_y_0_to_fp16)[name = string("_inversed_3984_cast_fp16")]; + tensor var_3985_cast_fp16 = tanh(x = _inversed_3984_cast_fp16)[name = string("op_3985_cast_fp16")]; + fp16 var_3986_to_fp16 = const()[name = string("op_3986_to_fp16"), val = fp16(0x1.ep+4)]; + tensor logits_cast_fp16 = mul(x = var_3985_cast_fp16, y = var_3986_to_fp16)[name = string("logits_cast_fp16")]; + int32 var_3990_axis_0 = const()[name = string("op_3990_axis_0"), val = int32(-1)]; + bool var_3990_keep_dims_0 = const()[name = string("op_3990_keep_dims_0"), val = bool(false)]; + string var_3990_output_dtype_0 = const()[name = string("op_3990_output_dtype_0"), val = string("int32")]; + tensor token_ids = reduce_argmax(axis = var_3990_axis_0, keep_dims = var_3990_keep_dims_0, output_dtype = var_3990_output_dtype_0, x = logits_cast_fp16)[name = string("op_3990_cast_fp16")]; + } -> (token_ids, hidden_states_out); +} \ No newline at end of file diff --git a/chunk4.mlmodelc/weights/weight.bin b/chunk4.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..adcac95f1a12061f4deb5d464d08365bc361a40d --- /dev/null +++ b/chunk4.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f044d109750fec5d781baada3c070cd3d524b674ea68a00c1c99b5e8015cfbb +size 753797440 diff --git a/cos_full.npy b/cos_full.npy new file mode 100644 index 0000000000000000000000000000000000000000..058ad65d525c836881e2aa531644304f1a3bafdb --- /dev/null +++ b/cos_full.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:767b3a21305a67e3a3fd22e256f2e7385014b32374442b6103fb820c7d9ef1fc +size 4194432 diff --git a/cos_sliding.npy b/cos_sliding.npy new file mode 100644 index 0000000000000000000000000000000000000000..81b6cd936a9988f2372f611d346b9efc6056811b --- /dev/null +++ b/cos_sliding.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27afac2d0282008c59736cb498f0b49e6f775e0b3847811fdd06be09c6df4a1 +size 2097280 diff --git a/embed_proj_weight.npy b/embed_proj_weight.npy new file mode 100644 index 0000000000000000000000000000000000000000..9db21521929994c12b8541a2c09340b5f5b04ff9 --- /dev/null +++ b/embed_proj_weight.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cae010a79030ee666136bf5317af76019a87a686bc93947ca1e20535f4a9109 +size 7864448 diff --git a/embed_tokens_per_layer_q8.bin b/embed_tokens_per_layer_q8.bin new file mode 100644 index 0000000000000000000000000000000000000000..5057e698d18cb2404767c1285cbefbec61dfc2fc --- /dev/null +++ b/embed_tokens_per_layer_q8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269eb54aa366e2d73474d7521b376025fa53bd2d72c6d3bc0301b0882c6ae681 +size 2818572288 diff --git a/embed_tokens_per_layer_scales.bin b/embed_tokens_per_layer_scales.bin new file mode 100644 index 0000000000000000000000000000000000000000..104df79461e2090edb9c517f9210d6928120bf40 --- /dev/null +++ b/embed_tokens_per_layer_scales.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc32925fb678b96d96cf804b77c1d137f157a6136b76e6f8a003d69f4e976fea +size 524288 diff --git a/embed_tokens_q8.bin b/embed_tokens_q8.bin new file mode 100644 index 0000000000000000000000000000000000000000..87966ac230eb189839f50f90764b79b584c7e24a --- /dev/null +++ b/embed_tokens_q8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8921fcbb6d8a79c7b304b929f357cdb41905fec75a90bad00dfed071c76fb82 +size 671088640 diff --git a/embed_tokens_scales.bin b/embed_tokens_scales.bin new file mode 100644 index 0000000000000000000000000000000000000000..5da6d354c44eb2197c2445ecd48a51ce866b6c95 --- /dev/null +++ b/embed_tokens_scales.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c20211cbfbb5e5a1059d91c5c1e0493e7630505028459e7843a9b1b41ee854 +size 524288 diff --git a/hf_model/config.json b/hf_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d68960fdcce766f2bfe41436325a8a483a74d125 --- /dev/null +++ b/hf_model/config.json @@ -0,0 +1,197 @@ +{ + "architectures": [ + "Gemma4ForConditionalGeneration" + ], + "audio_config": { + "_name_or_path": "", + "architectures": null, + "attention_chunk_size": 12, + "attention_context_left": 13, + "attention_context_right": 0, + "attention_invalid_logits_value": -1000000000.0, + "attention_logit_cap": 50.0, + "chunk_size_feed_forward": 0, + "conv_kernel_size": 5, + "dtype": "bfloat16", + "gradient_clipping": 10000000000.0, + "hidden_act": "silu", + "hidden_size": 1024, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_range": 0.02, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_type": "gemma4_audio", + "num_attention_heads": 8, + "num_hidden_layers": 12, + "output_attentions": false, + "output_hidden_states": false, + "output_proj_dims": 1536, + "problem_type": null, + "residual_weight": 0.5, + "return_dict": true, + "rms_norm_eps": 1e-06, + "subsampling_conv_channels": [ + 128, + 32 + ], + "use_clipped_linears": true + }, + "audio_token_id": 258881, + "boa_token_id": 256000, + "boi_token_id": 255999, + "dtype": "bfloat16", + "eoa_token_id": 258883, + "eoa_token_index": 258883, + "eoi_token_id": 258882, + "eos_token_id": [ + 1, + 106 + ], + "image_token_id": 258880, + "initializer_range": 0.02, + "model_type": "gemma4", + "text_config": { + "attention_bias": false, + "attention_dropout": 0.0, + "attention_k_eq_v": false, + "bos_token_id": 2, + "dtype": "bfloat16", + "enable_moe_block": false, + "eos_token_id": 1, + "expert_intermediate_size": null, + "final_logit_softcapping": 30.0, + "global_head_dim": 512, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2560, + "hidden_size_per_layer_input": 256, + "initializer_range": 0.02, + "intermediate_size": 10240, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma4_text", + "num_attention_heads": 8, + "num_experts": null, + "num_global_key_value_heads": null, + "num_hidden_layers": 42, + "num_key_value_heads": 2, + "num_kv_shared_layers": 18, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "full_attention": { + "partial_rotary_factor": 0.25, + "rope_theta": 1000000.0, + "rope_type": "proportional" + }, + "sliding_attention": { + "rope_theta": 10000.0, + "rope_type": "default" + } + }, + "sliding_window": 512, + "tie_word_embeddings": true, + "top_k_experts": null, + "use_bidirectional_attention": null, + "use_cache": true, + "use_double_wide_mlp": false, + "vocab_size": 262144, + "vocab_size_per_layer_input": 262144 + }, + "tie_word_embeddings": true, + "transformers_version": "5.5.0.dev0", + "video_token_id": 258884, + "vision_config": { + "_name_or_path": "", + "architectures": null, + "attention_bias": false, + "attention_dropout": 0.0, + "chunk_size_feed_forward": 0, + "default_output_length": 280, + "dtype": "bfloat16", + "global_head_dim": 64, + "head_dim": 64, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 768, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "max_position_embeddings": 131072, + "model_type": "gemma4_vision", + "num_attention_heads": 12, + "num_hidden_layers": 16, + "num_key_value_heads": 12, + "output_attentions": false, + "output_hidden_states": false, + "patch_size": 16, + "pooling_kernel_size": 3, + "position_embedding_size": 10240, + "problem_type": null, + "return_dict": true, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 100.0, + "rope_type": "default" + }, + "standardize": false, + "use_clipped_linears": true + }, + "vision_soft_tokens_per_image": 280 +} diff --git a/hf_model/generation_config.json b/hf_model/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e605bb4523b1462ea9d9a3810b9e3ecf7ab7b1f6 --- /dev/null +++ b/hf_model/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 2, + "do_sample": true, + "eos_token_id": [ + 1, + 106, + 50 + ], + "pad_token_id": 0, + "temperature": 1.0, + "top_k": 64, + "top_p": 0.95, + "transformers_version": "5.5.0.dev0" +} diff --git a/hf_model/tokenizer.json b/hf_model/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1ff9f3e3439a939b971f9919e821bf87e835a503 --- /dev/null +++ b/hf_model/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f +size 32169626 diff --git a/hf_model/tokenizer_config.json b/hf_model/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..375b25dc8be85705251e41be1c25310d24932051 --- /dev/null +++ b/hf_model/tokenizer_config.json @@ -0,0 +1,74 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "response_schema": { + "type": "object", + "properties": { + "role": { + "const": "assistant" + }, + "thinking": { + "type": "string" + }, + "content": { + "type": "string" + }, + "tool_calls": { + "x-regex-iterator": "<\\|tool_call>(.*?)", + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "const": "function" + }, + "function": { + "type": "object", + "x-regex": "call\\:(?P\\w+)(?P\\{.*\\})", + "properties": { + "name": { + "type": "string" + }, + "arguments": { + "type": "object", + "x-parser": "gemma4-tool-call", + "additionalProperties": {} + } + } + } + } + } + } + }, + "x-regex": "(\\<\\|channel\\>thought\\n(?P.*?)\\)?(?P\\<\\|tool_call\\>.*\\)?(?P(?:(?!\\)(?!\\<\\|tool_response\\>).)+)?(?:\\|\\<\\|tool_response\\>)?" + }, + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "" +} diff --git a/mel_filterbank.bin b/mel_filterbank.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea2424596423068bc05fa92aba129c762f76cb8e --- /dev/null +++ b/mel_filterbank.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427860b9a9429175f0e450512def4224f46ced89960dfb1d9cf7479d7e485e2b +size 131584 diff --git a/model_config.json b/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef5ff6c86cbdc5bee360796fd8530719038e2f52 --- /dev/null +++ b/model_config.json @@ -0,0 +1,26 @@ +{ + "model_name": "gemma4-e4b", + "architecture": "gemma4", + "hidden_size": 2560, + "num_hidden_layers": 42, + "num_attention_heads": 8, + "num_key_value_heads": 2, + "head_dim": 256, + "global_head_dim": 512, + "vocab_size": 262144, + "context_length": 2048, + "sliding_window": 512, + "per_layer_dim": 256, + "num_layers": 42, + "embed_scale": 50.59644256269407, + "per_layer_embed_scale": 16.0, + "per_layer_model_projection_scale": 0.01976423537605237, + "per_layer_input_scale": 0.7071067811865475, + "rms_norm_eps": 1e-06, + "bos_token_id": 2, + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "quantization": "int4", + "compute_units": "CPU_AND_NE", + "tokenizer_repo": "google/gemma-4-E4B-it" +} \ No newline at end of file diff --git a/output_proj_bias.npy b/output_proj_bias.npy new file mode 100644 index 0000000000000000000000000000000000000000..839b7c9c54c0777bbbd8f9a50523223b269150af --- /dev/null +++ b/output_proj_bias.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f564508f25bb143480d3dd9071c5deb8340f7c9b35582fefd55815ab355597 +size 3200 diff --git a/output_proj_weight.npy b/output_proj_weight.npy new file mode 100644 index 0000000000000000000000000000000000000000..d0a509337452653891a974a7a612a66fd0bffa69 --- /dev/null +++ b/output_proj_weight.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074449599eed804bc8fe0e83e96d535109c1f9f9a3c51df7c3a232d67502e285 +size 3145856 diff --git a/per_layer_norm_weight.bin b/per_layer_norm_weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c2e296eb70aa8677008db33c3db01be0e62c139 --- /dev/null +++ b/per_layer_norm_weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff67a6ed2e1ac597c28467653c4d67ecd4018f668b1d667af95e564539bd4c10 +size 512 diff --git a/per_layer_projection.bin b/per_layer_projection.bin new file mode 100644 index 0000000000000000000000000000000000000000..66e5e2170c01ffefd98d3404b48d49d3fb4380fd --- /dev/null +++ b/per_layer_projection.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8889e468a6c7e0a43f6ea4a7df18435cf75a5e496415146790f9e0a8a9cd63f +size 55050240 diff --git a/sin_full.npy b/sin_full.npy new file mode 100644 index 0000000000000000000000000000000000000000..1d5eb6ea4929fefb9bb3d459c2224d6772506c2c --- /dev/null +++ b/sin_full.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9d6aa40ca6b4f02f2be7563be801bc8ed77bcbc06f3aeb46050587401f2b4e +size 4194432 diff --git a/sin_sliding.npy b/sin_sliding.npy new file mode 100644 index 0000000000000000000000000000000000000000..30a46c37c40dea4cffd340d215d0253a8d8bb854 --- /dev/null +++ b/sin_sliding.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a996e1172dbb9748de65e1b7cede880e07f89310719215c627cf26faebcd55 +size 2097280 diff --git a/vision.ane.mlmodelc/analytics/coremldata.bin b/vision.ane.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c62398ed1dc87044069208da609bd1d9e672628 --- /dev/null +++ b/vision.ane.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfbfea0ab4e72567e783811aad1d436664ec9c6ecc2b1004406cab6a27b3b4b +size 243 diff --git a/vision.ane.mlmodelc/coremldata.bin b/vision.ane.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..d72155fb99f7cbb8da9bcd7de526a2e793e67163 --- /dev/null +++ b/vision.ane.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e5aeccbc809b1a933d9859e140e927272469e6714736580382e3b807d92d91 +size 426 diff --git a/vision.ane.mlmodelc/metadata.json b/vision.ane.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..1795f749c3737fb8dad0a4112f5113acd82b78d3 --- /dev/null +++ b/vision.ane.mlmodelc/metadata.json @@ -0,0 +1,100 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 256 × 2560)", + "shortDescription" : "", + "shape" : "[1, 256, 2560]", + "name" : "image_features", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.expandDims" : 134, + "Ios18.mul" : 423, + "Ios18.cos" : 2, + "Ios18.softmax" : 16, + "Ios18.matmul" : 36, + "Ios18.floorDiv" : 2, + "Ios16.reduceMean" : 112, + "Ios18.logicalNot" : 1, + "Ios18.equal" : 1, + "Ios18.sin" : 2, + "Split" : 97, + "Select" : 2, + "Ios16.reduceMax" : 1, + "Ios16.reduceMin" : 1, + "Ios18.add" : 227, + "Ios16.reduceSum" : 1, + "Tile" : 1, + "Ios18.layerNorm" : 1, + "Ios18.reshape" : 64, + "Ios18.maximum" : 1, + "Ios18.linear" : 114, + "Ios18.concat" : 101, + "Ios18.transpose" : 67, + "OneHot" : 2, + "Ios18.sub" : 2, + "Ios18.cast" : 10, + "Ios18.pow" : 224, + "Ios18.clip" : 176, + "Ios18.gelu" : 16, + "Ios18.sliceByIndex" : 133 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.conversion_date" : "2026-04-30", + "com.github.apple.coremltools.source" : "torch==2.11.0", + "com.github.apple.coremltools.version" : "9.0", + "com.github.apple.coremltools.source_dialect" : "TorchScript" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 2304 × 768)", + "shortDescription" : "", + "shape" : "[1, 2304, 768]", + "name" : "pixel_values", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Int32", + "formattedType" : "MultiArray (Int32 1 × 2304 × 2)", + "shortDescription" : "", + "shape" : "[1, 2304, 2]", + "name" : "pixel_position_ids", + "type" : "MultiArray" + } + ], + "generatedClassName" : "vision_ane", + "method" : "predict" + } +] \ No newline at end of file diff --git a/vision.ane.mlmodelc/model.mil b/vision.ane.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..a91c0e99dc32ab695a7a8a424680506a9512d479 --- /dev/null +++ b/vision.ane.mlmodelc/model.mil @@ -0,0 +1,4167 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})] +{ + func main(tensor pixel_position_ids, tensor pixel_values) { + tensor model_vision_tower_patch_embedder_position_embedding_table = const()[name = string("model_vision_tower_patch_embedder_position_embedding_table"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor model_vision_tower_patch_embedder_input_proj_weight = const()[name = string("model_vision_tower_patch_embedder_input_proj_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31457408)))]; + int32 var_17 = const()[name = string("op_17"), val = int32(3)]; + int32 var_31 = const()[name = string("op_31"), val = int32(0)]; + int32 var_38 = const()[name = string("op_38"), val = int32(-1)]; + tensor var_47 = equal(x = pixel_position_ids, y = var_38)[name = string("op_47")]; + tensor var_38_list = const()[name = string("op_38_list"), val = tensor([-1])]; + string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("int32")]; + bool reduce_min_0_keep_dims_0 = const()[name = string("reduce_min_0_keep_dims_0"), val = bool(false)]; + tensor cast_1 = cast(dtype = cast_1_dtype_0, x = var_47)[name = string("cast_80")]; + tensor reduce_min_0 = reduce_min(axes = var_38_list, keep_dims = reduce_min_0_keep_dims_0, x = cast_1)[name = string("reduce_min_0")]; + string padding_positions_dtype_0 = const()[name = string("padding_positions_dtype_0"), val = string("bool")]; + fp16 var_51_to_fp16 = const()[name = string("op_51_to_fp16"), val = fp16(0x1p-1)]; + tensor var_52_cast_fp16 = sub(x = pixel_values, y = var_51_to_fp16)[name = string("op_52_cast_fp16")]; + fp16 var_53_promoted_to_fp16 = const()[name = string("op_53_promoted_to_fp16"), val = fp16(0x1p+1)]; + tensor pixel_values_cast_fp16 = mul(x = var_52_cast_fp16, y = var_53_promoted_to_fp16)[name = string("pixel_values_cast_fp16")]; + tensor linear_0_bias_0 = const()[name = string("linear_0_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32637120)))]; + tensor hidden_states_1 = linear(bias = linear_0_bias_0, weight = model_vision_tower_patch_embedder_input_proj_weight, x = pixel_values_cast_fp16)[name = string("linear_0")]; + tensor clamped_positions_1 = maximum(x = pixel_position_ids, y = var_31)[name = string("clamped_positions_1")]; + int32 one_hot_1_one_hot_vector_size_0 = const()[name = string("one_hot_1_one_hot_vector_size_0"), val = int32(10240)]; + int32 one_hot_1_axis_0 = const()[name = string("one_hot_1_axis_0"), val = int32(-1)]; + int32 one_hot_1_on_value_0 = const()[name = string("one_hot_1_on_value_0"), val = int32(1)]; + int32 one_hot_1_off_value_0 = const()[name = string("one_hot_1_off_value_0"), val = int32(0)]; + tensor one_hot_1 = one_hot(axis = one_hot_1_axis_0, indices = clamped_positions_1, off_value = one_hot_1_off_value_0, on_value = one_hot_1_on_value_0, one_hot_vector_size = one_hot_1_one_hot_vector_size_0)[name = string("one_hot_1")]; + tensor var_60 = const()[name = string("op_60"), val = tensor([0, 2, 1, 3])]; + string one_hot_dtype_0 = const()[name = string("one_hot_dtype_0"), val = string("fp16")]; + bool position_embeddings_1_transpose_x_0 = const()[name = string("position_embeddings_1_transpose_x_0"), val = bool(false)]; + bool position_embeddings_1_transpose_y_0 = const()[name = string("position_embeddings_1_transpose_y_0"), val = bool(false)]; + tensor var_61 = transpose(perm = var_60, x = one_hot_1)[name = string("transpose_162")]; + tensor one_hot = cast(dtype = one_hot_dtype_0, x = var_61)[name = string("cast_78")]; + tensor position_embeddings_1 = matmul(transpose_x = position_embeddings_1_transpose_x_0, transpose_y = position_embeddings_1_transpose_y_0, x = one_hot, y = model_vision_tower_patch_embedder_position_embedding_table)[name = string("position_embeddings_1")]; + tensor position_embeddings_3_axes_0 = const()[name = string("position_embeddings_3_axes_0"), val = tensor([1])]; + bool position_embeddings_3_keep_dims_0 = const()[name = string("position_embeddings_3_keep_dims_0"), val = bool(false)]; + tensor position_embeddings_3 = reduce_sum(axes = position_embeddings_3_axes_0, keep_dims = position_embeddings_3_keep_dims_0, x = position_embeddings_1)[name = string("position_embeddings_3")]; + tensor var_66_axes_0 = const()[name = string("op_66_axes_0"), val = tensor([-1])]; + tensor padding_positions = cast(dtype = padding_positions_dtype_0, x = reduce_min_0)[name = string("cast_79")]; + tensor var_66 = expand_dims(axes = var_66_axes_0, x = padding_positions)[name = string("op_66")]; + tensor var_66_after_broadcast_reps_0 = const()[name = string("op_66_after_broadcast_reps_0"), val = tensor([1, 1, 768])]; + tensor var_66_after_broadcast = tile(reps = var_66_after_broadcast_reps_0, x = var_66)[name = string("op_66_after_broadcast")]; + tensor var_36_after_broadcast_to_fp16 = const()[name = string("op_36_after_broadcast_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32638720)))]; + tensor position_embeddings_cast_fp16 = select(a = var_36_after_broadcast_to_fp16, b = position_embeddings_3, cond = var_66_after_broadcast)[name = string("position_embeddings_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = add(x = hidden_states_1, y = position_embeddings_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor attention_mask_1 = logical_not(x = padding_positions)[name = string("attention_mask_1")]; + string am_dtype_0 = const()[name = string("am_dtype_0"), val = string("fp16")]; + tensor var_104_axes_0 = const()[name = string("op_104_axes_0"), val = tensor([1])]; + tensor am = cast(dtype = am_dtype_0, x = attention_mask_1)[name = string("cast_77")]; + tensor var_104 = expand_dims(axes = var_104_axes_0, x = am)[name = string("op_104")]; + tensor row_axes_0 = const()[name = string("row_axes_0"), val = tensor([2])]; + tensor row = expand_dims(axes = row_axes_0, x = var_104)[name = string("row")]; + tensor col_axes_0 = const()[name = string("col_axes_0"), val = tensor([3])]; + tensor col = expand_dims(axes = col_axes_0, x = var_104)[name = string("col")]; + tensor var_108 = mul(x = row, y = col)[name = string("op_108")]; + fp16 var_21_to_fp16 = const()[name = string("op_21_to_fp16"), val = fp16(0x1p+0)]; + tensor var_109_cast_fp16 = sub(x = var_21_to_fp16, y = var_108)[name = string("op_109_cast_fp16")]; + fp16 var_110_to_fp16 = const()[name = string("op_110_to_fp16"), val = fp16(-0x1.ffcp+15)]; + tensor attention_mask_cast_fp16 = mul(x = var_109_cast_fp16, y = var_110_to_fp16)[name = string("attention_mask_cast_fp16")]; + tensor dim_position_ids_1_begin_0 = const()[name = string("dim_position_ids_1_begin_0"), val = tensor([0, 0, 0])]; + tensor dim_position_ids_1_end_0 = const()[name = string("dim_position_ids_1_end_0"), val = tensor([1, 2304, 1])]; + tensor dim_position_ids_1_end_mask_0 = const()[name = string("dim_position_ids_1_end_mask_0"), val = tensor([true, true, false])]; + tensor dim_position_ids_1_squeeze_mask_0 = const()[name = string("dim_position_ids_1_squeeze_mask_0"), val = tensor([false, false, true])]; + tensor dim_position_ids_1 = slice_by_index(begin = dim_position_ids_1_begin_0, end = dim_position_ids_1_end_0, end_mask = dim_position_ids_1_end_mask_0, squeeze_mask = dim_position_ids_1_squeeze_mask_0, x = pixel_position_ids)[name = string("dim_position_ids_1")]; + tensor var_125_axes_0 = const()[name = string("op_125_axes_0"), val = tensor([1])]; + tensor var_125 = expand_dims(axes = var_125_axes_0, x = dim_position_ids_1)[name = string("op_125")]; + bool var_130_transpose_x_0 = const()[name = string("op_130_transpose_x_0"), val = bool(false)]; + bool var_130_transpose_y_0 = const()[name = string("op_130_transpose_y_0"), val = bool(false)]; + tensor const_4_to_fp16 = const()[name = string("const_4_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36177728)))]; + string dim_position_ids_expanded_1_to_fp16_dtype_0 = const()[name = string("dim_position_ids_expanded_1_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_125_to_fp16 = cast(dtype = dim_position_ids_expanded_1_to_fp16_dtype_0, x = var_125)[name = string("cast_76")]; + tensor var_130_cast_fp16 = matmul(transpose_x = var_130_transpose_x_0, transpose_y = var_130_transpose_y_0, x = const_4_to_fp16, y = var_125_to_fp16)[name = string("op_130_cast_fp16")]; + tensor freqs_1_perm_0 = const()[name = string("freqs_1_perm_0"), val = tensor([0, 2, 1])]; + bool emb_1_interleave_0 = const()[name = string("emb_1_interleave_0"), val = bool(false)]; + tensor freqs_1_cast_fp16 = transpose(perm = freqs_1_perm_0, x = var_130_cast_fp16)[name = string("transpose_161")]; + tensor emb_1_cast_fp16 = concat(axis = var_38, interleave = emb_1_interleave_0, values = (freqs_1_cast_fp16, freqs_1_cast_fp16))[name = string("emb_1_cast_fp16")]; + tensor var_134_cast_fp16 = cos(x = emb_1_cast_fp16)[name = string("op_134_cast_fp16")]; + tensor var_137_cast_fp16 = sin(x = emb_1_cast_fp16)[name = string("op_137_cast_fp16")]; + tensor dim_position_ids_begin_0 = const()[name = string("dim_position_ids_begin_0"), val = tensor([0, 0, 1])]; + tensor dim_position_ids_end_0 = const()[name = string("dim_position_ids_end_0"), val = tensor([1, 2304, 2])]; + tensor dim_position_ids_end_mask_0 = const()[name = string("dim_position_ids_end_mask_0"), val = tensor([true, true, false])]; + tensor dim_position_ids_squeeze_mask_0 = const()[name = string("dim_position_ids_squeeze_mask_0"), val = tensor([false, false, true])]; + tensor dim_position_ids = slice_by_index(begin = dim_position_ids_begin_0, end = dim_position_ids_end_0, end_mask = dim_position_ids_end_mask_0, squeeze_mask = dim_position_ids_squeeze_mask_0, x = pixel_position_ids)[name = string("dim_position_ids")]; + tensor var_144_axes_0 = const()[name = string("op_144_axes_0"), val = tensor([1])]; + tensor var_144 = expand_dims(axes = var_144_axes_0, x = dim_position_ids)[name = string("op_144")]; + bool var_149_transpose_x_0 = const()[name = string("op_149_transpose_x_0"), val = bool(false)]; + bool var_149_transpose_y_0 = const()[name = string("op_149_transpose_y_0"), val = bool(false)]; + tensor const_5_to_fp16 = const()[name = string("const_5_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36177856)))]; + string dim_position_ids_expanded_5_to_fp16_dtype_0 = const()[name = string("dim_position_ids_expanded_5_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_144_to_fp16 = cast(dtype = dim_position_ids_expanded_5_to_fp16_dtype_0, x = var_144)[name = string("cast_75")]; + tensor var_149_cast_fp16 = matmul(transpose_x = var_149_transpose_x_0, transpose_y = var_149_transpose_y_0, x = const_5_to_fp16, y = var_144_to_fp16)[name = string("op_149_cast_fp16")]; + tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; + bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; + tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_149_cast_fp16)[name = string("transpose_160")]; + tensor emb_cast_fp16 = concat(axis = var_38, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; + tensor var_153_cast_fp16 = cos(x = emb_cast_fp16)[name = string("op_153_cast_fp16")]; + tensor var_156_cast_fp16 = sin(x = emb_cast_fp16)[name = string("op_156_cast_fp16")]; + bool var_160_interleave_0 = const()[name = string("op_160_interleave_0"), val = bool(false)]; + tensor var_160_cast_fp16 = concat(axis = var_38, interleave = var_160_interleave_0, values = (var_134_cast_fp16, var_153_cast_fp16))[name = string("op_160_cast_fp16")]; + bool var_163_interleave_0 = const()[name = string("op_163_interleave_0"), val = bool(false)]; + tensor var_163_cast_fp16 = concat(axis = var_38, interleave = var_163_interleave_0, values = (var_137_cast_fp16, var_156_cast_fp16))[name = string("op_163_cast_fp16")]; + fp16 var_33_promoted_to_fp16 = const()[name = string("op_33_promoted_to_fp16"), val = fp16(0x1p+1)]; + tensor var_176_cast_fp16 = pow(x = hidden_states_3_cast_fp16, y = var_33_promoted_to_fp16)[name = string("op_176_cast_fp16")]; + tensor var_178_axes_0 = const()[name = string("op_178_axes_0"), val = tensor([-1])]; + bool var_178_keep_dims_0 = const()[name = string("op_178_keep_dims_0"), val = bool(true)]; + tensor var_178_cast_fp16 = reduce_mean(axes = var_178_axes_0, keep_dims = var_178_keep_dims_0, x = var_176_cast_fp16)[name = string("op_178_cast_fp16")]; + fp16 var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_1_cast_fp16 = add(x = var_178_cast_fp16, y = var_179_to_fp16)[name = string("mean_squared_1_cast_fp16")]; + fp16 var_27_to_fp16 = const()[name = string("op_27_to_fp16"), val = fp16(-0x1p-1)]; + tensor var_181_cast_fp16 = pow(x = mean_squared_1_cast_fp16, y = var_27_to_fp16)[name = string("op_181_cast_fp16")]; + tensor normed_output_1_cast_fp16 = mul(x = hidden_states_3_cast_fp16, y = var_181_cast_fp16)[name = string("normed_output_1_cast_fp16")]; + tensor const_6_to_fp16 = const()[name = string("const_6_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36177984)))]; + tensor normed_output_3_cast_fp16 = mul(x = normed_output_1_cast_fp16, y = const_6_to_fp16)[name = string("normed_output_3_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.98p+2)]; + fp16 model_vision_tower_encoder_layers_0_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.94p+2)]; + tensor clip_0_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_3_cast_fp16)[name = string("clip_0_cast_fp16")]; + tensor model_vision_tower_encoder_layers_0_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36179584)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_0_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_0_cast_fp16)[name = string("linear_1_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.6ap+3)]; + fp16 model_vision_tower_encoder_layers_0_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.66p+3)]; + tensor clip_1_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_1_cast_fp16)[name = string("clip_1_cast_fp16")]; + tensor var_203 = const()[name = string("op_203"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_11_cast_fp16 = reshape(shape = var_203, x = clip_1_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + fp16 var_33_promoted_1_to_fp16 = const()[name = string("op_33_promoted_1_to_fp16"), val = fp16(0x1p+1)]; + tensor var_207_cast_fp16 = pow(x = hidden_states_11_cast_fp16, y = var_33_promoted_1_to_fp16)[name = string("op_207_cast_fp16")]; + tensor var_209_axes_0 = const()[name = string("op_209_axes_0"), val = tensor([-1])]; + bool var_209_keep_dims_0 = const()[name = string("op_209_keep_dims_0"), val = bool(true)]; + tensor var_209_cast_fp16 = reduce_mean(axes = var_209_axes_0, keep_dims = var_209_keep_dims_0, x = var_207_cast_fp16)[name = string("op_209_cast_fp16")]; + fp16 var_210_to_fp16 = const()[name = string("op_210_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_3_cast_fp16 = add(x = var_209_cast_fp16, y = var_210_to_fp16)[name = string("mean_squared_3_cast_fp16")]; + tensor var_212_cast_fp16 = pow(x = mean_squared_3_cast_fp16, y = var_27_to_fp16)[name = string("op_212_cast_fp16")]; + tensor normed_output_5_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = var_212_cast_fp16)[name = string("normed_output_5_cast_fp16")]; + tensor const_9_to_fp16 = const()[name = string("const_9_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37359296)))]; + tensor normed_output_7_cast_fp16 = mul(x = normed_output_5_cast_fp16, y = const_9_to_fp16)[name = string("normed_output_7_cast_fp16")]; + tensor var_232 = const()[name = string("op_232"), val = tensor([32, 32])]; + int32 var_233_axis_0 = const()[name = string("op_233_axis_0"), val = int32(-1)]; + tensor var_233_cast_fp16_0, tensor var_233_cast_fp16_1 = split(axis = var_233_axis_0, split_sizes = var_232, x = normed_output_7_cast_fp16)[name = string("op_233_cast_fp16")]; + tensor var_236 = const()[name = string("op_236"), val = tensor([32, 32])]; + int32 var_237_axis_0 = const()[name = string("op_237_axis_0"), val = int32(-1)]; + tensor var_237_0, tensor var_237_1 = split(axis = var_237_axis_0, split_sizes = var_236, x = var_160_cast_fp16)[name = string("op_237")]; + tensor var_240 = const()[name = string("op_240"), val = tensor([32, 32])]; + int32 var_241_axis_0 = const()[name = string("op_241_axis_0"), val = int32(-1)]; + tensor var_241_0, tensor var_241_1 = split(axis = var_241_axis_0, split_sizes = var_240, x = var_163_cast_fp16)[name = string("op_241")]; + tensor cos_5_axes_0 = const()[name = string("cos_5_axes_0"), val = tensor([2])]; + tensor cos_5 = expand_dims(axes = cos_5_axes_0, x = var_237_0)[name = string("cos_5")]; + tensor sin_5_axes_0 = const()[name = string("sin_5_axes_0"), val = tensor([2])]; + tensor sin_5 = expand_dims(axes = sin_5_axes_0, x = var_241_0)[name = string("sin_5")]; + tensor var_246_cast_fp16 = mul(x = var_233_cast_fp16_0, y = cos_5)[name = string("op_246_cast_fp16")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_233_cast_fp16_0)[name = string("x1_1_cast_fp16")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_233_cast_fp16_0)[name = string("x2_1_cast_fp16")]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_257_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_257_cast_fp16")]; + bool var_259_interleave_0 = const()[name = string("op_259_interleave_0"), val = bool(false)]; + tensor var_259_cast_fp16 = concat(axis = var_38, interleave = var_259_interleave_0, values = (var_257_cast_fp16, x1_1_cast_fp16))[name = string("op_259_cast_fp16")]; + tensor var_260_cast_fp16 = mul(x = var_259_cast_fp16, y = sin_5)[name = string("op_260_cast_fp16")]; + tensor var_261_cast_fp16 = add(x = var_246_cast_fp16, y = var_260_cast_fp16)[name = string("op_261_cast_fp16")]; + tensor cos_9_axes_0 = const()[name = string("cos_9_axes_0"), val = tensor([2])]; + tensor cos_9 = expand_dims(axes = cos_9_axes_0, x = var_237_1)[name = string("cos_9")]; + tensor sin_9_axes_0 = const()[name = string("sin_9_axes_0"), val = tensor([2])]; + tensor sin_9 = expand_dims(axes = sin_9_axes_0, x = var_241_1)[name = string("sin_9")]; + tensor var_264_cast_fp16 = mul(x = var_233_cast_fp16_1, y = cos_9)[name = string("op_264_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_233_cast_fp16_1)[name = string("x1_3_cast_fp16")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_233_cast_fp16_1)[name = string("x2_3_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_275_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_275_cast_fp16")]; + bool var_277_interleave_0 = const()[name = string("op_277_interleave_0"), val = bool(false)]; + tensor var_277_cast_fp16 = concat(axis = var_38, interleave = var_277_interleave_0, values = (var_275_cast_fp16, x1_3_cast_fp16))[name = string("op_277_cast_fp16")]; + tensor var_278_cast_fp16 = mul(x = var_277_cast_fp16, y = sin_9)[name = string("op_278_cast_fp16")]; + tensor var_279_cast_fp16 = add(x = var_264_cast_fp16, y = var_278_cast_fp16)[name = string("op_279_cast_fp16")]; + bool query_states_1_interleave_0 = const()[name = string("query_states_1_interleave_0"), val = bool(false)]; + tensor query_states_1_cast_fp16 = concat(axis = var_38, interleave = query_states_1_interleave_0, values = (var_261_cast_fp16, var_279_cast_fp16))[name = string("query_states_1_cast_fp16")]; + tensor model_vision_tower_encoder_layers_0_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37359488)))]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_0_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_0_cast_fp16)[name = string("linear_2_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.44p+3)]; + fp16 model_vision_tower_encoder_layers_0_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.42p+3)]; + tensor clip_3_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_2_cast_fp16)[name = string("clip_3_cast_fp16")]; + tensor var_292 = const()[name = string("op_292"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_17_cast_fp16 = reshape(shape = var_292, x = clip_3_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; + fp16 var_33_promoted_2_to_fp16 = const()[name = string("op_33_promoted_2_to_fp16"), val = fp16(0x1p+1)]; + tensor var_296_cast_fp16 = pow(x = hidden_states_17_cast_fp16, y = var_33_promoted_2_to_fp16)[name = string("op_296_cast_fp16")]; + tensor var_298_axes_0 = const()[name = string("op_298_axes_0"), val = tensor([-1])]; + bool var_298_keep_dims_0 = const()[name = string("op_298_keep_dims_0"), val = bool(true)]; + tensor var_298_cast_fp16 = reduce_mean(axes = var_298_axes_0, keep_dims = var_298_keep_dims_0, x = var_296_cast_fp16)[name = string("op_298_cast_fp16")]; + fp16 var_299_to_fp16 = const()[name = string("op_299_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_5_cast_fp16 = add(x = var_298_cast_fp16, y = var_299_to_fp16)[name = string("mean_squared_5_cast_fp16")]; + tensor var_301_cast_fp16 = pow(x = mean_squared_5_cast_fp16, y = var_27_to_fp16)[name = string("op_301_cast_fp16")]; + tensor normed_output_9_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = var_301_cast_fp16)[name = string("normed_output_9_cast_fp16")]; + tensor const_18_to_fp16 = const()[name = string("const_18_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38539200)))]; + tensor normed_output_11_cast_fp16 = mul(x = normed_output_9_cast_fp16, y = const_18_to_fp16)[name = string("normed_output_11_cast_fp16")]; + tensor var_321 = const()[name = string("op_321"), val = tensor([32, 32])]; + int32 var_322_axis_0 = const()[name = string("op_322_axis_0"), val = int32(-1)]; + tensor var_322_cast_fp16_0, tensor var_322_cast_fp16_1 = split(axis = var_322_axis_0, split_sizes = var_321, x = normed_output_11_cast_fp16)[name = string("op_322_cast_fp16")]; + tensor var_325 = const()[name = string("op_325"), val = tensor([32, 32])]; + int32 var_326_axis_0 = const()[name = string("op_326_axis_0"), val = int32(-1)]; + tensor var_326_0, tensor var_326_1 = split(axis = var_326_axis_0, split_sizes = var_325, x = var_160_cast_fp16)[name = string("op_326")]; + tensor var_329 = const()[name = string("op_329"), val = tensor([32, 32])]; + int32 var_330_axis_0 = const()[name = string("op_330_axis_0"), val = int32(-1)]; + tensor var_330_0, tensor var_330_1 = split(axis = var_330_axis_0, split_sizes = var_329, x = var_163_cast_fp16)[name = string("op_330")]; + tensor cos_13_axes_0 = const()[name = string("cos_13_axes_0"), val = tensor([2])]; + tensor cos_13 = expand_dims(axes = cos_13_axes_0, x = var_326_0)[name = string("cos_13")]; + tensor sin_13_axes_0 = const()[name = string("sin_13_axes_0"), val = tensor([2])]; + tensor sin_13 = expand_dims(axes = sin_13_axes_0, x = var_330_0)[name = string("sin_13")]; + tensor var_335_cast_fp16 = mul(x = var_322_cast_fp16_0, y = cos_13)[name = string("op_335_cast_fp16")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_322_cast_fp16_0)[name = string("x1_5_cast_fp16")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_322_cast_fp16_0)[name = string("x2_5_cast_fp16")]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_346_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_346_cast_fp16")]; + bool var_348_interleave_0 = const()[name = string("op_348_interleave_0"), val = bool(false)]; + tensor var_348_cast_fp16 = concat(axis = var_38, interleave = var_348_interleave_0, values = (var_346_cast_fp16, x1_5_cast_fp16))[name = string("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = var_348_cast_fp16, y = sin_13)[name = string("op_349_cast_fp16")]; + tensor var_350_cast_fp16 = add(x = var_335_cast_fp16, y = var_349_cast_fp16)[name = string("op_350_cast_fp16")]; + tensor cos_17_axes_0 = const()[name = string("cos_17_axes_0"), val = tensor([2])]; + tensor cos_17 = expand_dims(axes = cos_17_axes_0, x = var_326_1)[name = string("cos_17")]; + tensor sin_17_axes_0 = const()[name = string("sin_17_axes_0"), val = tensor([2])]; + tensor sin_17 = expand_dims(axes = sin_17_axes_0, x = var_330_1)[name = string("sin_17")]; + tensor var_353_cast_fp16 = mul(x = var_322_cast_fp16_1, y = cos_17)[name = string("op_353_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_322_cast_fp16_1)[name = string("x1_7_cast_fp16")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_322_cast_fp16_1)[name = string("x2_7_cast_fp16")]; + fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_364_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_364_cast_fp16")]; + bool var_366_interleave_0 = const()[name = string("op_366_interleave_0"), val = bool(false)]; + tensor var_366_cast_fp16 = concat(axis = var_38, interleave = var_366_interleave_0, values = (var_364_cast_fp16, x1_7_cast_fp16))[name = string("op_366_cast_fp16")]; + tensor var_367_cast_fp16 = mul(x = var_366_cast_fp16, y = sin_17)[name = string("op_367_cast_fp16")]; + tensor var_368_cast_fp16 = add(x = var_353_cast_fp16, y = var_367_cast_fp16)[name = string("op_368_cast_fp16")]; + bool key_states_1_interleave_0 = const()[name = string("key_states_1_interleave_0"), val = bool(false)]; + tensor key_states_1_cast_fp16 = concat(axis = var_38, interleave = key_states_1_interleave_0, values = (var_350_cast_fp16, var_368_cast_fp16))[name = string("key_states_1_cast_fp16")]; + tensor model_vision_tower_encoder_layers_0_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38539392)))]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_0_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_0_cast_fp16)[name = string("linear_3_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.44p+3)]; + fp16 model_vision_tower_encoder_layers_0_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.42p+3)]; + tensor clip_5_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_3_cast_fp16)[name = string("clip_5_cast_fp16")]; + tensor var_381 = const()[name = string("op_381"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_23_cast_fp16 = reshape(shape = var_381, x = clip_5_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + fp16 var_33_promoted_3_to_fp16 = const()[name = string("op_33_promoted_3_to_fp16"), val = fp16(0x1p+1)]; + tensor var_384_cast_fp16 = pow(x = hidden_states_23_cast_fp16, y = var_33_promoted_3_to_fp16)[name = string("op_384_cast_fp16")]; + tensor var_386_axes_0 = const()[name = string("op_386_axes_0"), val = tensor([-1])]; + bool var_386_keep_dims_0 = const()[name = string("op_386_keep_dims_0"), val = bool(true)]; + tensor var_386_cast_fp16 = reduce_mean(axes = var_386_axes_0, keep_dims = var_386_keep_dims_0, x = var_384_cast_fp16)[name = string("op_386_cast_fp16")]; + fp16 var_387_to_fp16 = const()[name = string("op_387_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_7_cast_fp16 = add(x = var_386_cast_fp16, y = var_387_to_fp16)[name = string("mean_squared_7_cast_fp16")]; + tensor var_389_cast_fp16 = pow(x = mean_squared_7_cast_fp16, y = var_27_to_fp16)[name = string("op_389_cast_fp16")]; + tensor normed_output_13_cast_fp16 = mul(x = hidden_states_23_cast_fp16, y = var_389_cast_fp16)[name = string("normed_output_13_cast_fp16")]; + tensor hidden_states_29_perm_0 = const()[name = string("hidden_states_29_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(true)]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = key_states_1_cast_fp16)[name = string("transpose_157")]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = query_states_1_cast_fp16)[name = string("transpose_158")]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("matmul_0_cast_fp16")]; + tensor add_0_cast_fp16 = add(x = matmul_0_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_0_cast_fp16")]; + int32 softmax_0_axis_0 = const()[name = string("softmax_0_axis_0"), val = int32(-1)]; + tensor softmax_0_cast_fp16 = softmax(axis = softmax_0_axis_0, x = add_0_cast_fp16)[name = string("softmax_0_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor hidden_states_29_cast_fp16 = transpose(perm = hidden_states_29_perm_0, x = normed_output_13_cast_fp16)[name = string("transpose_159")]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = softmax_0_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_394_perm_0 = const()[name = string("op_394_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_396 = const()[name = string("op_396"), val = tensor([1, 2304, -1])]; + tensor var_394_cast_fp16 = transpose(perm = var_394_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_156")]; + tensor var_397_cast_fp16 = reshape(shape = var_396, x = var_394_cast_fp16)[name = string("op_397_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.cp+1)]; + fp16 model_vision_tower_encoder_layers_0_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.bep+1)]; + tensor clip_6_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_self_attn_o_proj_input_max_promoted_to_fp16, x = var_397_cast_fp16)[name = string("clip_6_cast_fp16")]; + tensor model_vision_tower_encoder_layers_0_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39719104)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_0_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_6_cast_fp16)[name = string("linear_4_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.56p+4)]; + fp16 model_vision_tower_encoder_layers_0_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.54p+4)]; + tensor clip_7_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_4_cast_fp16)[name = string("clip_7_cast_fp16")]; + fp16 var_33_promoted_4_to_fp16 = const()[name = string("op_33_promoted_4_to_fp16"), val = fp16(0x1p+1)]; + tensor var_410_cast_fp16 = pow(x = clip_7_cast_fp16, y = var_33_promoted_4_to_fp16)[name = string("op_410_cast_fp16")]; + tensor var_412_axes_0 = const()[name = string("op_412_axes_0"), val = tensor([-1])]; + bool var_412_keep_dims_0 = const()[name = string("op_412_keep_dims_0"), val = bool(true)]; + tensor var_412_cast_fp16 = reduce_mean(axes = var_412_axes_0, keep_dims = var_412_keep_dims_0, x = var_410_cast_fp16)[name = string("op_412_cast_fp16")]; + fp16 var_413_to_fp16 = const()[name = string("op_413_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_9_cast_fp16 = add(x = var_412_cast_fp16, y = var_413_to_fp16)[name = string("mean_squared_9_cast_fp16")]; + tensor var_415_cast_fp16 = pow(x = mean_squared_9_cast_fp16, y = var_27_to_fp16)[name = string("op_415_cast_fp16")]; + tensor normed_output_15_cast_fp16 = mul(x = clip_7_cast_fp16, y = var_415_cast_fp16)[name = string("normed_output_15_cast_fp16")]; + tensor const_27_to_fp16 = const()[name = string("const_27_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40898816)))]; + tensor normed_output_17_cast_fp16 = mul(x = normed_output_15_cast_fp16, y = const_27_to_fp16)[name = string("normed_output_17_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = normed_output_17_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + fp16 var_33_promoted_5_to_fp16 = const()[name = string("op_33_promoted_5_to_fp16"), val = fp16(0x1p+1)]; + tensor var_423_cast_fp16 = pow(x = hidden_states_41_cast_fp16, y = var_33_promoted_5_to_fp16)[name = string("op_423_cast_fp16")]; + tensor var_425_axes_0 = const()[name = string("op_425_axes_0"), val = tensor([-1])]; + bool var_425_keep_dims_0 = const()[name = string("op_425_keep_dims_0"), val = bool(true)]; + tensor var_425_cast_fp16 = reduce_mean(axes = var_425_axes_0, keep_dims = var_425_keep_dims_0, x = var_423_cast_fp16)[name = string("op_425_cast_fp16")]; + fp16 var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_11_cast_fp16 = add(x = var_425_cast_fp16, y = var_426_to_fp16)[name = string("mean_squared_11_cast_fp16")]; + tensor var_428_cast_fp16 = pow(x = mean_squared_11_cast_fp16, y = var_27_to_fp16)[name = string("op_428_cast_fp16")]; + tensor normed_output_19_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = var_428_cast_fp16)[name = string("normed_output_19_cast_fp16")]; + tensor const_28_to_fp16 = const()[name = string("const_28_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40900416)))]; + tensor normed_output_21_cast_fp16 = mul(x = normed_output_19_cast_fp16, y = const_28_to_fp16)[name = string("normed_output_21_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.3ap+1)]; + fp16 model_vision_tower_encoder_layers_0_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.38p+1)]; + tensor clip_8_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_21_cast_fp16)[name = string("clip_8_cast_fp16")]; + tensor model_vision_tower_encoder_layers_0_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40902016)))]; + tensor linear_5_bias_0_to_fp16 = const()[name = string("linear_5_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45620672)))]; + tensor linear_5_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_0_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_8_cast_fp16)[name = string("linear_5_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.2p+2)]; + fp16 model_vision_tower_encoder_layers_0_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.1ep+2)]; + tensor clip_9_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_5_cast_fp16)[name = string("clip_9_cast_fp16")]; + string var_445_mode_0 = const()[name = string("op_445_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_445_cast_fp16 = gelu(mode = var_445_mode_0, x = clip_9_cast_fp16)[name = string("op_445_cast_fp16")]; + tensor model_vision_tower_encoder_layers_0_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45626880)))]; + tensor linear_6_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_0_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_8_cast_fp16)[name = string("linear_6_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.2p+2)]; + fp16 model_vision_tower_encoder_layers_0_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.1ep+2)]; + tensor clip_11_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_mlp_up_proj_output_max_promoted_to_fp16, x = linear_6_cast_fp16)[name = string("clip_11_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = mul(x = var_445_cast_fp16, y = clip_11_cast_fp16)[name = string("hidden_states_51_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.88p+3)]; + fp16 model_vision_tower_encoder_layers_0_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.86p+3)]; + tensor clip_12_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_51_cast_fp16)[name = string("clip_12_cast_fp16")]; + tensor model_vision_tower_encoder_layers_0_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50345536)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_0_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_12_cast_fp16)[name = string("linear_7_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_0_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.4cp+3)]; + fp16 model_vision_tower_encoder_layers_0_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_0_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.48p+3)]; + tensor clip_13_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_0_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_0_mlp_down_proj_output_max_promoted_to_fp16, x = linear_7_cast_fp16)[name = string("clip_13_cast_fp16")]; + fp16 var_33_promoted_6_to_fp16 = const()[name = string("op_33_promoted_6_to_fp16"), val = fp16(0x1p+1)]; + tensor var_467_cast_fp16 = pow(x = clip_13_cast_fp16, y = var_33_promoted_6_to_fp16)[name = string("op_467_cast_fp16")]; + tensor var_469_axes_0 = const()[name = string("op_469_axes_0"), val = tensor([-1])]; + bool var_469_keep_dims_0 = const()[name = string("op_469_keep_dims_0"), val = bool(true)]; + tensor var_469_cast_fp16 = reduce_mean(axes = var_469_axes_0, keep_dims = var_469_keep_dims_0, x = var_467_cast_fp16)[name = string("op_469_cast_fp16")]; + fp16 var_470_to_fp16 = const()[name = string("op_470_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_13_cast_fp16 = add(x = var_469_cast_fp16, y = var_470_to_fp16)[name = string("mean_squared_13_cast_fp16")]; + tensor var_472_cast_fp16 = pow(x = mean_squared_13_cast_fp16, y = var_27_to_fp16)[name = string("op_472_cast_fp16")]; + tensor normed_output_23_cast_fp16 = mul(x = clip_13_cast_fp16, y = var_472_cast_fp16)[name = string("normed_output_23_cast_fp16")]; + tensor const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55064192)))]; + tensor normed_output_25_cast_fp16 = mul(x = normed_output_23_cast_fp16, y = const_29_to_fp16)[name = string("normed_output_25_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = normed_output_25_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + fp16 var_33_promoted_7_to_fp16 = const()[name = string("op_33_promoted_7_to_fp16"), val = fp16(0x1p+1)]; + tensor var_486_cast_fp16 = pow(x = hidden_states_61_cast_fp16, y = var_33_promoted_7_to_fp16)[name = string("op_486_cast_fp16")]; + tensor var_488_axes_0 = const()[name = string("op_488_axes_0"), val = tensor([-1])]; + bool var_488_keep_dims_0 = const()[name = string("op_488_keep_dims_0"), val = bool(true)]; + tensor var_488_cast_fp16 = reduce_mean(axes = var_488_axes_0, keep_dims = var_488_keep_dims_0, x = var_486_cast_fp16)[name = string("op_488_cast_fp16")]; + fp16 var_489_to_fp16 = const()[name = string("op_489_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_15_cast_fp16 = add(x = var_488_cast_fp16, y = var_489_to_fp16)[name = string("mean_squared_15_cast_fp16")]; + tensor var_491_cast_fp16 = pow(x = mean_squared_15_cast_fp16, y = var_27_to_fp16)[name = string("op_491_cast_fp16")]; + tensor normed_output_27_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = var_491_cast_fp16)[name = string("normed_output_27_cast_fp16")]; + tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55065792)))]; + tensor normed_output_29_cast_fp16 = mul(x = normed_output_27_cast_fp16, y = const_30_to_fp16)[name = string("normed_output_29_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.64p+2)]; + fp16 model_vision_tower_encoder_layers_1_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.62p+2)]; + tensor clip_14_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_29_cast_fp16)[name = string("clip_14_cast_fp16")]; + tensor model_vision_tower_encoder_layers_1_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55067392)))]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_1_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_14_cast_fp16)[name = string("linear_8_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.8ep+3)]; + fp16 model_vision_tower_encoder_layers_1_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.8cp+3)]; + tensor clip_15_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_8_cast_fp16)[name = string("clip_15_cast_fp16")]; + tensor var_513 = const()[name = string("op_513"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_69_cast_fp16 = reshape(shape = var_513, x = clip_15_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + fp16 var_33_promoted_8_to_fp16 = const()[name = string("op_33_promoted_8_to_fp16"), val = fp16(0x1p+1)]; + tensor var_517_cast_fp16 = pow(x = hidden_states_69_cast_fp16, y = var_33_promoted_8_to_fp16)[name = string("op_517_cast_fp16")]; + tensor var_519_axes_0 = const()[name = string("op_519_axes_0"), val = tensor([-1])]; + bool var_519_keep_dims_0 = const()[name = string("op_519_keep_dims_0"), val = bool(true)]; + tensor var_519_cast_fp16 = reduce_mean(axes = var_519_axes_0, keep_dims = var_519_keep_dims_0, x = var_517_cast_fp16)[name = string("op_519_cast_fp16")]; + fp16 var_520_to_fp16 = const()[name = string("op_520_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_17_cast_fp16 = add(x = var_519_cast_fp16, y = var_520_to_fp16)[name = string("mean_squared_17_cast_fp16")]; + tensor var_522_cast_fp16 = pow(x = mean_squared_17_cast_fp16, y = var_27_to_fp16)[name = string("op_522_cast_fp16")]; + tensor normed_output_31_cast_fp16 = mul(x = hidden_states_69_cast_fp16, y = var_522_cast_fp16)[name = string("normed_output_31_cast_fp16")]; + tensor const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56247104)))]; + tensor normed_output_33_cast_fp16 = mul(x = normed_output_31_cast_fp16, y = const_33_to_fp16)[name = string("normed_output_33_cast_fp16")]; + tensor var_542 = const()[name = string("op_542"), val = tensor([32, 32])]; + int32 var_543_axis_0 = const()[name = string("op_543_axis_0"), val = int32(-1)]; + tensor var_543_cast_fp16_0, tensor var_543_cast_fp16_1 = split(axis = var_543_axis_0, split_sizes = var_542, x = normed_output_33_cast_fp16)[name = string("op_543_cast_fp16")]; + tensor var_546 = const()[name = string("op_546"), val = tensor([32, 32])]; + int32 var_547_axis_0 = const()[name = string("op_547_axis_0"), val = int32(-1)]; + tensor var_547_0, tensor var_547_1 = split(axis = var_547_axis_0, split_sizes = var_546, x = var_160_cast_fp16)[name = string("op_547")]; + tensor var_550 = const()[name = string("op_550"), val = tensor([32, 32])]; + int32 var_551_axis_0 = const()[name = string("op_551_axis_0"), val = int32(-1)]; + tensor var_551_0, tensor var_551_1 = split(axis = var_551_axis_0, split_sizes = var_550, x = var_163_cast_fp16)[name = string("op_551")]; + tensor cos_21_axes_0 = const()[name = string("cos_21_axes_0"), val = tensor([2])]; + tensor cos_21 = expand_dims(axes = cos_21_axes_0, x = var_547_0)[name = string("cos_21")]; + tensor sin_21_axes_0 = const()[name = string("sin_21_axes_0"), val = tensor([2])]; + tensor sin_21 = expand_dims(axes = sin_21_axes_0, x = var_551_0)[name = string("sin_21")]; + tensor var_556_cast_fp16 = mul(x = var_543_cast_fp16_0, y = cos_21)[name = string("op_556_cast_fp16")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_543_cast_fp16_0)[name = string("x1_9_cast_fp16")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_543_cast_fp16_0)[name = string("x2_9_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_567_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_567_cast_fp16")]; + bool var_569_interleave_0 = const()[name = string("op_569_interleave_0"), val = bool(false)]; + tensor var_569_cast_fp16 = concat(axis = var_38, interleave = var_569_interleave_0, values = (var_567_cast_fp16, x1_9_cast_fp16))[name = string("op_569_cast_fp16")]; + tensor var_570_cast_fp16 = mul(x = var_569_cast_fp16, y = sin_21)[name = string("op_570_cast_fp16")]; + tensor var_571_cast_fp16 = add(x = var_556_cast_fp16, y = var_570_cast_fp16)[name = string("op_571_cast_fp16")]; + tensor cos_25_axes_0 = const()[name = string("cos_25_axes_0"), val = tensor([2])]; + tensor cos_25 = expand_dims(axes = cos_25_axes_0, x = var_547_1)[name = string("cos_25")]; + tensor sin_25_axes_0 = const()[name = string("sin_25_axes_0"), val = tensor([2])]; + tensor sin_25 = expand_dims(axes = sin_25_axes_0, x = var_551_1)[name = string("sin_25")]; + tensor var_574_cast_fp16 = mul(x = var_543_cast_fp16_1, y = cos_25)[name = string("op_574_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_543_cast_fp16_1)[name = string("x1_11_cast_fp16")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_543_cast_fp16_1)[name = string("x2_11_cast_fp16")]; + fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_585_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_41_promoted_to_fp16)[name = string("op_585_cast_fp16")]; + bool var_587_interleave_0 = const()[name = string("op_587_interleave_0"), val = bool(false)]; + tensor var_587_cast_fp16 = concat(axis = var_38, interleave = var_587_interleave_0, values = (var_585_cast_fp16, x1_11_cast_fp16))[name = string("op_587_cast_fp16")]; + tensor var_588_cast_fp16 = mul(x = var_587_cast_fp16, y = sin_25)[name = string("op_588_cast_fp16")]; + tensor var_589_cast_fp16 = add(x = var_574_cast_fp16, y = var_588_cast_fp16)[name = string("op_589_cast_fp16")]; + bool query_states_3_interleave_0 = const()[name = string("query_states_3_interleave_0"), val = bool(false)]; + tensor query_states_3_cast_fp16 = concat(axis = var_38, interleave = query_states_3_interleave_0, values = (var_571_cast_fp16, var_589_cast_fp16))[name = string("query_states_3_cast_fp16")]; + tensor model_vision_tower_encoder_layers_1_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56247296)))]; + tensor linear_9_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_1_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_14_cast_fp16)[name = string("linear_9_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.66p+3)]; + fp16 model_vision_tower_encoder_layers_1_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.64p+3)]; + tensor clip_17_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_9_cast_fp16)[name = string("clip_17_cast_fp16")]; + tensor var_602 = const()[name = string("op_602"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_75_cast_fp16 = reshape(shape = var_602, x = clip_17_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; + fp16 var_33_promoted_9_to_fp16 = const()[name = string("op_33_promoted_9_to_fp16"), val = fp16(0x1p+1)]; + tensor var_606_cast_fp16 = pow(x = hidden_states_75_cast_fp16, y = var_33_promoted_9_to_fp16)[name = string("op_606_cast_fp16")]; + tensor var_608_axes_0 = const()[name = string("op_608_axes_0"), val = tensor([-1])]; + bool var_608_keep_dims_0 = const()[name = string("op_608_keep_dims_0"), val = bool(true)]; + tensor var_608_cast_fp16 = reduce_mean(axes = var_608_axes_0, keep_dims = var_608_keep_dims_0, x = var_606_cast_fp16)[name = string("op_608_cast_fp16")]; + fp16 var_609_to_fp16 = const()[name = string("op_609_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_19_cast_fp16 = add(x = var_608_cast_fp16, y = var_609_to_fp16)[name = string("mean_squared_19_cast_fp16")]; + tensor var_611_cast_fp16 = pow(x = mean_squared_19_cast_fp16, y = var_27_to_fp16)[name = string("op_611_cast_fp16")]; + tensor normed_output_35_cast_fp16 = mul(x = hidden_states_75_cast_fp16, y = var_611_cast_fp16)[name = string("normed_output_35_cast_fp16")]; + tensor const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57427008)))]; + tensor normed_output_37_cast_fp16 = mul(x = normed_output_35_cast_fp16, y = const_42_to_fp16)[name = string("normed_output_37_cast_fp16")]; + tensor var_631 = const()[name = string("op_631"), val = tensor([32, 32])]; + int32 var_632_axis_0 = const()[name = string("op_632_axis_0"), val = int32(-1)]; + tensor var_632_cast_fp16_0, tensor var_632_cast_fp16_1 = split(axis = var_632_axis_0, split_sizes = var_631, x = normed_output_37_cast_fp16)[name = string("op_632_cast_fp16")]; + tensor var_635 = const()[name = string("op_635"), val = tensor([32, 32])]; + int32 var_636_axis_0 = const()[name = string("op_636_axis_0"), val = int32(-1)]; + tensor var_636_0, tensor var_636_1 = split(axis = var_636_axis_0, split_sizes = var_635, x = var_160_cast_fp16)[name = string("op_636")]; + tensor var_639 = const()[name = string("op_639"), val = tensor([32, 32])]; + int32 var_640_axis_0 = const()[name = string("op_640_axis_0"), val = int32(-1)]; + tensor var_640_0, tensor var_640_1 = split(axis = var_640_axis_0, split_sizes = var_639, x = var_163_cast_fp16)[name = string("op_640")]; + tensor cos_29_axes_0 = const()[name = string("cos_29_axes_0"), val = tensor([2])]; + tensor cos_29 = expand_dims(axes = cos_29_axes_0, x = var_636_0)[name = string("cos_29")]; + tensor sin_29_axes_0 = const()[name = string("sin_29_axes_0"), val = tensor([2])]; + tensor sin_29 = expand_dims(axes = sin_29_axes_0, x = var_640_0)[name = string("sin_29")]; + tensor var_645_cast_fp16 = mul(x = var_632_cast_fp16_0, y = cos_29)[name = string("op_645_cast_fp16")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_632_cast_fp16_0)[name = string("x1_13_cast_fp16")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_632_cast_fp16_0)[name = string("x2_13_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_656_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_656_cast_fp16")]; + bool var_658_interleave_0 = const()[name = string("op_658_interleave_0"), val = bool(false)]; + tensor var_658_cast_fp16 = concat(axis = var_38, interleave = var_658_interleave_0, values = (var_656_cast_fp16, x1_13_cast_fp16))[name = string("op_658_cast_fp16")]; + tensor var_659_cast_fp16 = mul(x = var_658_cast_fp16, y = sin_29)[name = string("op_659_cast_fp16")]; + tensor var_660_cast_fp16 = add(x = var_645_cast_fp16, y = var_659_cast_fp16)[name = string("op_660_cast_fp16")]; + tensor cos_33_axes_0 = const()[name = string("cos_33_axes_0"), val = tensor([2])]; + tensor cos_33 = expand_dims(axes = cos_33_axes_0, x = var_636_1)[name = string("cos_33")]; + tensor sin_33_axes_0 = const()[name = string("sin_33_axes_0"), val = tensor([2])]; + tensor sin_33 = expand_dims(axes = sin_33_axes_0, x = var_640_1)[name = string("sin_33")]; + tensor var_663_cast_fp16 = mul(x = var_632_cast_fp16_1, y = cos_33)[name = string("op_663_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_632_cast_fp16_1)[name = string("x1_15_cast_fp16")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_632_cast_fp16_1)[name = string("x2_15_cast_fp16")]; + fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_674_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_50_promoted_to_fp16)[name = string("op_674_cast_fp16")]; + bool var_676_interleave_0 = const()[name = string("op_676_interleave_0"), val = bool(false)]; + tensor var_676_cast_fp16 = concat(axis = var_38, interleave = var_676_interleave_0, values = (var_674_cast_fp16, x1_15_cast_fp16))[name = string("op_676_cast_fp16")]; + tensor var_677_cast_fp16 = mul(x = var_676_cast_fp16, y = sin_33)[name = string("op_677_cast_fp16")]; + tensor var_678_cast_fp16 = add(x = var_663_cast_fp16, y = var_677_cast_fp16)[name = string("op_678_cast_fp16")]; + bool key_states_3_interleave_0 = const()[name = string("key_states_3_interleave_0"), val = bool(false)]; + tensor key_states_3_cast_fp16 = concat(axis = var_38, interleave = key_states_3_interleave_0, values = (var_660_cast_fp16, var_678_cast_fp16))[name = string("key_states_3_cast_fp16")]; + tensor model_vision_tower_encoder_layers_1_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57427200)))]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_1_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_14_cast_fp16)[name = string("linear_10_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.66p+3)]; + fp16 model_vision_tower_encoder_layers_1_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.64p+3)]; + tensor clip_19_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_10_cast_fp16)[name = string("clip_19_cast_fp16")]; + tensor var_691 = const()[name = string("op_691"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_81_cast_fp16 = reshape(shape = var_691, x = clip_19_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; + fp16 var_33_promoted_10_to_fp16 = const()[name = string("op_33_promoted_10_to_fp16"), val = fp16(0x1p+1)]; + tensor var_694_cast_fp16 = pow(x = hidden_states_81_cast_fp16, y = var_33_promoted_10_to_fp16)[name = string("op_694_cast_fp16")]; + tensor var_696_axes_0 = const()[name = string("op_696_axes_0"), val = tensor([-1])]; + bool var_696_keep_dims_0 = const()[name = string("op_696_keep_dims_0"), val = bool(true)]; + tensor var_696_cast_fp16 = reduce_mean(axes = var_696_axes_0, keep_dims = var_696_keep_dims_0, x = var_694_cast_fp16)[name = string("op_696_cast_fp16")]; + fp16 var_697_to_fp16 = const()[name = string("op_697_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_21_cast_fp16 = add(x = var_696_cast_fp16, y = var_697_to_fp16)[name = string("mean_squared_21_cast_fp16")]; + tensor var_699_cast_fp16 = pow(x = mean_squared_21_cast_fp16, y = var_27_to_fp16)[name = string("op_699_cast_fp16")]; + tensor normed_output_39_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = var_699_cast_fp16)[name = string("normed_output_39_cast_fp16")]; + tensor hidden_states_87_perm_0 = const()[name = string("hidden_states_87_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(true)]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = key_states_3_cast_fp16)[name = string("transpose_153")]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = query_states_3_cast_fp16)[name = string("transpose_154")]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("matmul_1_cast_fp16")]; + tensor add_1_cast_fp16 = add(x = matmul_1_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_1_cast_fp16")]; + int32 softmax_1_axis_0 = const()[name = string("softmax_1_axis_0"), val = int32(-1)]; + tensor softmax_1_cast_fp16 = softmax(axis = softmax_1_axis_0, x = add_1_cast_fp16)[name = string("softmax_1_cast_fp16")]; + bool attn_output_5_transpose_x_0 = const()[name = string("attn_output_5_transpose_x_0"), val = bool(false)]; + bool attn_output_5_transpose_y_0 = const()[name = string("attn_output_5_transpose_y_0"), val = bool(false)]; + tensor hidden_states_87_cast_fp16 = transpose(perm = hidden_states_87_perm_0, x = normed_output_39_cast_fp16)[name = string("transpose_155")]; + tensor attn_output_5_cast_fp16 = matmul(transpose_x = attn_output_5_transpose_x_0, transpose_y = attn_output_5_transpose_y_0, x = softmax_1_cast_fp16, y = hidden_states_87_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_704_perm_0 = const()[name = string("op_704_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_706 = const()[name = string("op_706"), val = tensor([1, 2304, -1])]; + tensor var_704_cast_fp16 = transpose(perm = var_704_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_152")]; + tensor var_707_cast_fp16 = reshape(shape = var_706, x = var_704_cast_fp16)[name = string("op_707_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.92p+1)]; + fp16 model_vision_tower_encoder_layers_1_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.8ep+1)]; + tensor clip_20_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_self_attn_o_proj_input_max_promoted_to_fp16, x = var_707_cast_fp16)[name = string("clip_20_cast_fp16")]; + tensor model_vision_tower_encoder_layers_1_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58606912)))]; + tensor linear_11_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_1_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_20_cast_fp16)[name = string("linear_11_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.e8p+3)]; + fp16 model_vision_tower_encoder_layers_1_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.e4p+3)]; + tensor clip_21_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_11_cast_fp16)[name = string("clip_21_cast_fp16")]; + fp16 var_33_promoted_11_to_fp16 = const()[name = string("op_33_promoted_11_to_fp16"), val = fp16(0x1p+1)]; + tensor var_720_cast_fp16 = pow(x = clip_21_cast_fp16, y = var_33_promoted_11_to_fp16)[name = string("op_720_cast_fp16")]; + tensor var_722_axes_0 = const()[name = string("op_722_axes_0"), val = tensor([-1])]; + bool var_722_keep_dims_0 = const()[name = string("op_722_keep_dims_0"), val = bool(true)]; + tensor var_722_cast_fp16 = reduce_mean(axes = var_722_axes_0, keep_dims = var_722_keep_dims_0, x = var_720_cast_fp16)[name = string("op_722_cast_fp16")]; + fp16 var_723_to_fp16 = const()[name = string("op_723_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_23_cast_fp16 = add(x = var_722_cast_fp16, y = var_723_to_fp16)[name = string("mean_squared_23_cast_fp16")]; + tensor var_725_cast_fp16 = pow(x = mean_squared_23_cast_fp16, y = var_27_to_fp16)[name = string("op_725_cast_fp16")]; + tensor normed_output_41_cast_fp16 = mul(x = clip_21_cast_fp16, y = var_725_cast_fp16)[name = string("normed_output_41_cast_fp16")]; + tensor const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59786624)))]; + tensor normed_output_43_cast_fp16 = mul(x = normed_output_41_cast_fp16, y = const_51_to_fp16)[name = string("normed_output_43_cast_fp16")]; + tensor hidden_states_99_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = normed_output_43_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; + fp16 var_33_promoted_12_to_fp16 = const()[name = string("op_33_promoted_12_to_fp16"), val = fp16(0x1p+1)]; + tensor var_733_cast_fp16 = pow(x = hidden_states_99_cast_fp16, y = var_33_promoted_12_to_fp16)[name = string("op_733_cast_fp16")]; + tensor var_735_axes_0 = const()[name = string("op_735_axes_0"), val = tensor([-1])]; + bool var_735_keep_dims_0 = const()[name = string("op_735_keep_dims_0"), val = bool(true)]; + tensor var_735_cast_fp16 = reduce_mean(axes = var_735_axes_0, keep_dims = var_735_keep_dims_0, x = var_733_cast_fp16)[name = string("op_735_cast_fp16")]; + fp16 var_736_to_fp16 = const()[name = string("op_736_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_25_cast_fp16 = add(x = var_735_cast_fp16, y = var_736_to_fp16)[name = string("mean_squared_25_cast_fp16")]; + tensor var_738_cast_fp16 = pow(x = mean_squared_25_cast_fp16, y = var_27_to_fp16)[name = string("op_738_cast_fp16")]; + tensor normed_output_45_cast_fp16 = mul(x = hidden_states_99_cast_fp16, y = var_738_cast_fp16)[name = string("normed_output_45_cast_fp16")]; + tensor const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59788224)))]; + tensor normed_output_47_cast_fp16 = mul(x = normed_output_45_cast_fp16, y = const_52_to_fp16)[name = string("normed_output_47_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.16p+2)]; + fp16 model_vision_tower_encoder_layers_1_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.14p+2)]; + tensor clip_22_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_47_cast_fp16)[name = string("clip_22_cast_fp16")]; + tensor model_vision_tower_encoder_layers_1_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59789824)))]; + tensor linear_12_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_1_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_22_cast_fp16)[name = string("linear_12_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.82p+2)]; + fp16 model_vision_tower_encoder_layers_1_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.7ep+2)]; + tensor clip_23_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_12_cast_fp16)[name = string("clip_23_cast_fp16")]; + string var_755_mode_0 = const()[name = string("op_755_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_755_cast_fp16 = gelu(mode = var_755_mode_0, x = clip_23_cast_fp16)[name = string("op_755_cast_fp16")]; + tensor model_vision_tower_encoder_layers_1_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64508480)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_1_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_22_cast_fp16)[name = string("linear_13_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.82p+2)]; + fp16 model_vision_tower_encoder_layers_1_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.7ep+2)]; + tensor clip_25_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_mlp_up_proj_output_max_promoted_to_fp16, x = linear_13_cast_fp16)[name = string("clip_25_cast_fp16")]; + tensor hidden_states_109_cast_fp16 = mul(x = var_755_cast_fp16, y = clip_25_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.e4p+4)]; + fp16 model_vision_tower_encoder_layers_1_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.ep+4)]; + tensor clip_26_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_109_cast_fp16)[name = string("clip_26_cast_fp16")]; + tensor model_vision_tower_encoder_layers_1_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69227136)))]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_1_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_26_cast_fp16)[name = string("linear_14_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_1_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.26p+4)]; + fp16 model_vision_tower_encoder_layers_1_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_1_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.24p+4)]; + tensor clip_27_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_1_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_1_mlp_down_proj_output_max_promoted_to_fp16, x = linear_14_cast_fp16)[name = string("clip_27_cast_fp16")]; + fp16 var_33_promoted_13_to_fp16 = const()[name = string("op_33_promoted_13_to_fp16"), val = fp16(0x1p+1)]; + tensor var_777_cast_fp16 = pow(x = clip_27_cast_fp16, y = var_33_promoted_13_to_fp16)[name = string("op_777_cast_fp16")]; + tensor var_779_axes_0 = const()[name = string("op_779_axes_0"), val = tensor([-1])]; + bool var_779_keep_dims_0 = const()[name = string("op_779_keep_dims_0"), val = bool(true)]; + tensor var_779_cast_fp16 = reduce_mean(axes = var_779_axes_0, keep_dims = var_779_keep_dims_0, x = var_777_cast_fp16)[name = string("op_779_cast_fp16")]; + fp16 var_780_to_fp16 = const()[name = string("op_780_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_27_cast_fp16 = add(x = var_779_cast_fp16, y = var_780_to_fp16)[name = string("mean_squared_27_cast_fp16")]; + tensor var_782_cast_fp16 = pow(x = mean_squared_27_cast_fp16, y = var_27_to_fp16)[name = string("op_782_cast_fp16")]; + tensor normed_output_49_cast_fp16 = mul(x = clip_27_cast_fp16, y = var_782_cast_fp16)[name = string("normed_output_49_cast_fp16")]; + tensor const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73945792)))]; + tensor normed_output_51_cast_fp16 = mul(x = normed_output_49_cast_fp16, y = const_53_to_fp16)[name = string("normed_output_51_cast_fp16")]; + tensor hidden_states_119_cast_fp16 = add(x = hidden_states_99_cast_fp16, y = normed_output_51_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; + fp16 var_33_promoted_14_to_fp16 = const()[name = string("op_33_promoted_14_to_fp16"), val = fp16(0x1p+1)]; + tensor var_796_cast_fp16 = pow(x = hidden_states_119_cast_fp16, y = var_33_promoted_14_to_fp16)[name = string("op_796_cast_fp16")]; + tensor var_798_axes_0 = const()[name = string("op_798_axes_0"), val = tensor([-1])]; + bool var_798_keep_dims_0 = const()[name = string("op_798_keep_dims_0"), val = bool(true)]; + tensor var_798_cast_fp16 = reduce_mean(axes = var_798_axes_0, keep_dims = var_798_keep_dims_0, x = var_796_cast_fp16)[name = string("op_798_cast_fp16")]; + fp16 var_799_to_fp16 = const()[name = string("op_799_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_29_cast_fp16 = add(x = var_798_cast_fp16, y = var_799_to_fp16)[name = string("mean_squared_29_cast_fp16")]; + tensor var_801_cast_fp16 = pow(x = mean_squared_29_cast_fp16, y = var_27_to_fp16)[name = string("op_801_cast_fp16")]; + tensor normed_output_53_cast_fp16 = mul(x = hidden_states_119_cast_fp16, y = var_801_cast_fp16)[name = string("normed_output_53_cast_fp16")]; + tensor const_54_to_fp16 = const()[name = string("const_54_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73947392)))]; + tensor normed_output_55_cast_fp16 = mul(x = normed_output_53_cast_fp16, y = const_54_to_fp16)[name = string("normed_output_55_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.84p+3)]; + fp16 model_vision_tower_encoder_layers_2_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.82p+3)]; + tensor clip_28_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_55_cast_fp16)[name = string("clip_28_cast_fp16")]; + tensor model_vision_tower_encoder_layers_2_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73948992)))]; + tensor linear_15_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_2_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_28_cast_fp16)[name = string("linear_15_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.72p+4)]; + fp16 model_vision_tower_encoder_layers_2_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.6ep+4)]; + tensor clip_29_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_15_cast_fp16)[name = string("clip_29_cast_fp16")]; + tensor var_823 = const()[name = string("op_823"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_127_cast_fp16 = reshape(shape = var_823, x = clip_29_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + fp16 var_33_promoted_15_to_fp16 = const()[name = string("op_33_promoted_15_to_fp16"), val = fp16(0x1p+1)]; + tensor var_827_cast_fp16 = pow(x = hidden_states_127_cast_fp16, y = var_33_promoted_15_to_fp16)[name = string("op_827_cast_fp16")]; + tensor var_829_axes_0 = const()[name = string("op_829_axes_0"), val = tensor([-1])]; + bool var_829_keep_dims_0 = const()[name = string("op_829_keep_dims_0"), val = bool(true)]; + tensor var_829_cast_fp16 = reduce_mean(axes = var_829_axes_0, keep_dims = var_829_keep_dims_0, x = var_827_cast_fp16)[name = string("op_829_cast_fp16")]; + fp16 var_830_to_fp16 = const()[name = string("op_830_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_31_cast_fp16 = add(x = var_829_cast_fp16, y = var_830_to_fp16)[name = string("mean_squared_31_cast_fp16")]; + tensor var_832_cast_fp16 = pow(x = mean_squared_31_cast_fp16, y = var_27_to_fp16)[name = string("op_832_cast_fp16")]; + tensor normed_output_57_cast_fp16 = mul(x = hidden_states_127_cast_fp16, y = var_832_cast_fp16)[name = string("normed_output_57_cast_fp16")]; + tensor const_57_to_fp16 = const()[name = string("const_57_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75128704)))]; + tensor normed_output_59_cast_fp16 = mul(x = normed_output_57_cast_fp16, y = const_57_to_fp16)[name = string("normed_output_59_cast_fp16")]; + tensor var_852 = const()[name = string("op_852"), val = tensor([32, 32])]; + int32 var_853_axis_0 = const()[name = string("op_853_axis_0"), val = int32(-1)]; + tensor var_853_cast_fp16_0, tensor var_853_cast_fp16_1 = split(axis = var_853_axis_0, split_sizes = var_852, x = normed_output_59_cast_fp16)[name = string("op_853_cast_fp16")]; + tensor var_856 = const()[name = string("op_856"), val = tensor([32, 32])]; + int32 var_857_axis_0 = const()[name = string("op_857_axis_0"), val = int32(-1)]; + tensor var_857_0, tensor var_857_1 = split(axis = var_857_axis_0, split_sizes = var_856, x = var_160_cast_fp16)[name = string("op_857")]; + tensor var_860 = const()[name = string("op_860"), val = tensor([32, 32])]; + int32 var_861_axis_0 = const()[name = string("op_861_axis_0"), val = int32(-1)]; + tensor var_861_0, tensor var_861_1 = split(axis = var_861_axis_0, split_sizes = var_860, x = var_163_cast_fp16)[name = string("op_861")]; + tensor cos_37_axes_0 = const()[name = string("cos_37_axes_0"), val = tensor([2])]; + tensor cos_37 = expand_dims(axes = cos_37_axes_0, x = var_857_0)[name = string("cos_37")]; + tensor sin_37_axes_0 = const()[name = string("sin_37_axes_0"), val = tensor([2])]; + tensor sin_37 = expand_dims(axes = sin_37_axes_0, x = var_861_0)[name = string("sin_37")]; + tensor var_866_cast_fp16 = mul(x = var_853_cast_fp16_0, y = cos_37)[name = string("op_866_cast_fp16")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_853_cast_fp16_0)[name = string("x1_17_cast_fp16")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_853_cast_fp16_0)[name = string("x2_17_cast_fp16")]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_877_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_877_cast_fp16")]; + bool var_879_interleave_0 = const()[name = string("op_879_interleave_0"), val = bool(false)]; + tensor var_879_cast_fp16 = concat(axis = var_38, interleave = var_879_interleave_0, values = (var_877_cast_fp16, x1_17_cast_fp16))[name = string("op_879_cast_fp16")]; + tensor var_880_cast_fp16 = mul(x = var_879_cast_fp16, y = sin_37)[name = string("op_880_cast_fp16")]; + tensor var_881_cast_fp16 = add(x = var_866_cast_fp16, y = var_880_cast_fp16)[name = string("op_881_cast_fp16")]; + tensor cos_41_axes_0 = const()[name = string("cos_41_axes_0"), val = tensor([2])]; + tensor cos_41 = expand_dims(axes = cos_41_axes_0, x = var_857_1)[name = string("cos_41")]; + tensor sin_41_axes_0 = const()[name = string("sin_41_axes_0"), val = tensor([2])]; + tensor sin_41 = expand_dims(axes = sin_41_axes_0, x = var_861_1)[name = string("sin_41")]; + tensor var_884_cast_fp16 = mul(x = var_853_cast_fp16_1, y = cos_41)[name = string("op_884_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_853_cast_fp16_1)[name = string("x1_19_cast_fp16")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_853_cast_fp16_1)[name = string("x2_19_cast_fp16")]; + fp16 const_65_promoted_to_fp16 = const()[name = string("const_65_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_895_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_65_promoted_to_fp16)[name = string("op_895_cast_fp16")]; + bool var_897_interleave_0 = const()[name = string("op_897_interleave_0"), val = bool(false)]; + tensor var_897_cast_fp16 = concat(axis = var_38, interleave = var_897_interleave_0, values = (var_895_cast_fp16, x1_19_cast_fp16))[name = string("op_897_cast_fp16")]; + tensor var_898_cast_fp16 = mul(x = var_897_cast_fp16, y = sin_41)[name = string("op_898_cast_fp16")]; + tensor var_899_cast_fp16 = add(x = var_884_cast_fp16, y = var_898_cast_fp16)[name = string("op_899_cast_fp16")]; + bool query_states_5_interleave_0 = const()[name = string("query_states_5_interleave_0"), val = bool(false)]; + tensor query_states_5_cast_fp16 = concat(axis = var_38, interleave = query_states_5_interleave_0, values = (var_881_cast_fp16, var_899_cast_fp16))[name = string("query_states_5_cast_fp16")]; + tensor model_vision_tower_encoder_layers_2_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75128896)))]; + tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_2_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_28_cast_fp16)[name = string("linear_16_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.42p+4)]; + fp16 model_vision_tower_encoder_layers_2_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.3ep+4)]; + tensor clip_31_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_16_cast_fp16)[name = string("clip_31_cast_fp16")]; + tensor var_912 = const()[name = string("op_912"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_133_cast_fp16 = reshape(shape = var_912, x = clip_31_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; + fp16 var_33_promoted_16_to_fp16 = const()[name = string("op_33_promoted_16_to_fp16"), val = fp16(0x1p+1)]; + tensor var_916_cast_fp16 = pow(x = hidden_states_133_cast_fp16, y = var_33_promoted_16_to_fp16)[name = string("op_916_cast_fp16")]; + tensor var_918_axes_0 = const()[name = string("op_918_axes_0"), val = tensor([-1])]; + bool var_918_keep_dims_0 = const()[name = string("op_918_keep_dims_0"), val = bool(true)]; + tensor var_918_cast_fp16 = reduce_mean(axes = var_918_axes_0, keep_dims = var_918_keep_dims_0, x = var_916_cast_fp16)[name = string("op_918_cast_fp16")]; + fp16 var_919_to_fp16 = const()[name = string("op_919_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_33_cast_fp16 = add(x = var_918_cast_fp16, y = var_919_to_fp16)[name = string("mean_squared_33_cast_fp16")]; + tensor var_921_cast_fp16 = pow(x = mean_squared_33_cast_fp16, y = var_27_to_fp16)[name = string("op_921_cast_fp16")]; + tensor normed_output_61_cast_fp16 = mul(x = hidden_states_133_cast_fp16, y = var_921_cast_fp16)[name = string("normed_output_61_cast_fp16")]; + tensor const_66_to_fp16 = const()[name = string("const_66_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76308608)))]; + tensor normed_output_63_cast_fp16 = mul(x = normed_output_61_cast_fp16, y = const_66_to_fp16)[name = string("normed_output_63_cast_fp16")]; + tensor var_941 = const()[name = string("op_941"), val = tensor([32, 32])]; + int32 var_942_axis_0 = const()[name = string("op_942_axis_0"), val = int32(-1)]; + tensor var_942_cast_fp16_0, tensor var_942_cast_fp16_1 = split(axis = var_942_axis_0, split_sizes = var_941, x = normed_output_63_cast_fp16)[name = string("op_942_cast_fp16")]; + tensor var_945 = const()[name = string("op_945"), val = tensor([32, 32])]; + int32 var_946_axis_0 = const()[name = string("op_946_axis_0"), val = int32(-1)]; + tensor var_946_0, tensor var_946_1 = split(axis = var_946_axis_0, split_sizes = var_945, x = var_160_cast_fp16)[name = string("op_946")]; + tensor var_949 = const()[name = string("op_949"), val = tensor([32, 32])]; + int32 var_950_axis_0 = const()[name = string("op_950_axis_0"), val = int32(-1)]; + tensor var_950_0, tensor var_950_1 = split(axis = var_950_axis_0, split_sizes = var_949, x = var_163_cast_fp16)[name = string("op_950")]; + tensor cos_45_axes_0 = const()[name = string("cos_45_axes_0"), val = tensor([2])]; + tensor cos_45 = expand_dims(axes = cos_45_axes_0, x = var_946_0)[name = string("cos_45")]; + tensor sin_45_axes_0 = const()[name = string("sin_45_axes_0"), val = tensor([2])]; + tensor sin_45 = expand_dims(axes = sin_45_axes_0, x = var_950_0)[name = string("sin_45")]; + tensor var_955_cast_fp16 = mul(x = var_942_cast_fp16_0, y = cos_45)[name = string("op_955_cast_fp16")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_942_cast_fp16_0)[name = string("x1_21_cast_fp16")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_942_cast_fp16_0)[name = string("x2_21_cast_fp16")]; + fp16 const_71_promoted_to_fp16 = const()[name = string("const_71_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_966_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_71_promoted_to_fp16)[name = string("op_966_cast_fp16")]; + bool var_968_interleave_0 = const()[name = string("op_968_interleave_0"), val = bool(false)]; + tensor var_968_cast_fp16 = concat(axis = var_38, interleave = var_968_interleave_0, values = (var_966_cast_fp16, x1_21_cast_fp16))[name = string("op_968_cast_fp16")]; + tensor var_969_cast_fp16 = mul(x = var_968_cast_fp16, y = sin_45)[name = string("op_969_cast_fp16")]; + tensor var_970_cast_fp16 = add(x = var_955_cast_fp16, y = var_969_cast_fp16)[name = string("op_970_cast_fp16")]; + tensor cos_49_axes_0 = const()[name = string("cos_49_axes_0"), val = tensor([2])]; + tensor cos_49 = expand_dims(axes = cos_49_axes_0, x = var_946_1)[name = string("cos_49")]; + tensor sin_49_axes_0 = const()[name = string("sin_49_axes_0"), val = tensor([2])]; + tensor sin_49 = expand_dims(axes = sin_49_axes_0, x = var_950_1)[name = string("sin_49")]; + tensor var_973_cast_fp16 = mul(x = var_942_cast_fp16_1, y = cos_49)[name = string("op_973_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_942_cast_fp16_1)[name = string("x1_23_cast_fp16")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_942_cast_fp16_1)[name = string("x2_23_cast_fp16")]; + fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_984_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_984_cast_fp16")]; + bool var_986_interleave_0 = const()[name = string("op_986_interleave_0"), val = bool(false)]; + tensor var_986_cast_fp16 = concat(axis = var_38, interleave = var_986_interleave_0, values = (var_984_cast_fp16, x1_23_cast_fp16))[name = string("op_986_cast_fp16")]; + tensor var_987_cast_fp16 = mul(x = var_986_cast_fp16, y = sin_49)[name = string("op_987_cast_fp16")]; + tensor var_988_cast_fp16 = add(x = var_973_cast_fp16, y = var_987_cast_fp16)[name = string("op_988_cast_fp16")]; + bool key_states_5_interleave_0 = const()[name = string("key_states_5_interleave_0"), val = bool(false)]; + tensor key_states_5_cast_fp16 = concat(axis = var_38, interleave = key_states_5_interleave_0, values = (var_970_cast_fp16, var_988_cast_fp16))[name = string("key_states_5_cast_fp16")]; + tensor model_vision_tower_encoder_layers_2_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76308800)))]; + tensor linear_17_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_2_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_28_cast_fp16)[name = string("linear_17_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.42p+4)]; + fp16 model_vision_tower_encoder_layers_2_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.3ep+4)]; + tensor clip_33_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_17_cast_fp16)[name = string("clip_33_cast_fp16")]; + tensor var_1001 = const()[name = string("op_1001"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_139_cast_fp16 = reshape(shape = var_1001, x = clip_33_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; + fp16 var_33_promoted_17_to_fp16 = const()[name = string("op_33_promoted_17_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1004_cast_fp16 = pow(x = hidden_states_139_cast_fp16, y = var_33_promoted_17_to_fp16)[name = string("op_1004_cast_fp16")]; + tensor var_1006_axes_0 = const()[name = string("op_1006_axes_0"), val = tensor([-1])]; + bool var_1006_keep_dims_0 = const()[name = string("op_1006_keep_dims_0"), val = bool(true)]; + tensor var_1006_cast_fp16 = reduce_mean(axes = var_1006_axes_0, keep_dims = var_1006_keep_dims_0, x = var_1004_cast_fp16)[name = string("op_1006_cast_fp16")]; + fp16 var_1007_to_fp16 = const()[name = string("op_1007_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_35_cast_fp16 = add(x = var_1006_cast_fp16, y = var_1007_to_fp16)[name = string("mean_squared_35_cast_fp16")]; + tensor var_1009_cast_fp16 = pow(x = mean_squared_35_cast_fp16, y = var_27_to_fp16)[name = string("op_1009_cast_fp16")]; + tensor normed_output_65_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = var_1009_cast_fp16)[name = string("normed_output_65_cast_fp16")]; + tensor hidden_states_145_perm_0 = const()[name = string("hidden_states_145_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(true)]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = key_states_5_cast_fp16)[name = string("transpose_149")]; + tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = query_states_5_cast_fp16)[name = string("transpose_150")]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("matmul_2_cast_fp16")]; + tensor add_2_cast_fp16 = add(x = matmul_2_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_2_cast_fp16")]; + int32 softmax_2_axis_0 = const()[name = string("softmax_2_axis_0"), val = int32(-1)]; + tensor softmax_2_cast_fp16 = softmax(axis = softmax_2_axis_0, x = add_2_cast_fp16)[name = string("softmax_2_cast_fp16")]; + bool attn_output_9_transpose_x_0 = const()[name = string("attn_output_9_transpose_x_0"), val = bool(false)]; + bool attn_output_9_transpose_y_0 = const()[name = string("attn_output_9_transpose_y_0"), val = bool(false)]; + tensor hidden_states_145_cast_fp16 = transpose(perm = hidden_states_145_perm_0, x = normed_output_65_cast_fp16)[name = string("transpose_151")]; + tensor attn_output_9_cast_fp16 = matmul(transpose_x = attn_output_9_transpose_x_0, transpose_y = attn_output_9_transpose_y_0, x = softmax_2_cast_fp16, y = hidden_states_145_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_1014_perm_0 = const()[name = string("op_1014_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1016 = const()[name = string("op_1016"), val = tensor([1, 2304, -1])]; + tensor var_1014_cast_fp16 = transpose(perm = var_1014_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_148")]; + tensor var_1017_cast_fp16 = reshape(shape = var_1016, x = var_1014_cast_fp16)[name = string("op_1017_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.76p+1)]; + fp16 model_vision_tower_encoder_layers_2_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.72p+1)]; + tensor clip_34_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_self_attn_o_proj_input_max_promoted_to_fp16, x = var_1017_cast_fp16)[name = string("clip_34_cast_fp16")]; + tensor model_vision_tower_encoder_layers_2_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77488512)))]; + tensor linear_18_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_2_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_34_cast_fp16)[name = string("linear_18_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.aep+3)]; + fp16 model_vision_tower_encoder_layers_2_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.acp+3)]; + tensor clip_35_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_18_cast_fp16)[name = string("clip_35_cast_fp16")]; + fp16 var_33_promoted_18_to_fp16 = const()[name = string("op_33_promoted_18_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1030_cast_fp16 = pow(x = clip_35_cast_fp16, y = var_33_promoted_18_to_fp16)[name = string("op_1030_cast_fp16")]; + tensor var_1032_axes_0 = const()[name = string("op_1032_axes_0"), val = tensor([-1])]; + bool var_1032_keep_dims_0 = const()[name = string("op_1032_keep_dims_0"), val = bool(true)]; + tensor var_1032_cast_fp16 = reduce_mean(axes = var_1032_axes_0, keep_dims = var_1032_keep_dims_0, x = var_1030_cast_fp16)[name = string("op_1032_cast_fp16")]; + fp16 var_1033_to_fp16 = const()[name = string("op_1033_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_37_cast_fp16 = add(x = var_1032_cast_fp16, y = var_1033_to_fp16)[name = string("mean_squared_37_cast_fp16")]; + tensor var_1035_cast_fp16 = pow(x = mean_squared_37_cast_fp16, y = var_27_to_fp16)[name = string("op_1035_cast_fp16")]; + tensor normed_output_67_cast_fp16 = mul(x = clip_35_cast_fp16, y = var_1035_cast_fp16)[name = string("normed_output_67_cast_fp16")]; + tensor const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78668224)))]; + tensor normed_output_69_cast_fp16 = mul(x = normed_output_67_cast_fp16, y = const_75_to_fp16)[name = string("normed_output_69_cast_fp16")]; + tensor hidden_states_157_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = normed_output_69_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; + fp16 var_33_promoted_19_to_fp16 = const()[name = string("op_33_promoted_19_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1043_cast_fp16 = pow(x = hidden_states_157_cast_fp16, y = var_33_promoted_19_to_fp16)[name = string("op_1043_cast_fp16")]; + tensor var_1045_axes_0 = const()[name = string("op_1045_axes_0"), val = tensor([-1])]; + bool var_1045_keep_dims_0 = const()[name = string("op_1045_keep_dims_0"), val = bool(true)]; + tensor var_1045_cast_fp16 = reduce_mean(axes = var_1045_axes_0, keep_dims = var_1045_keep_dims_0, x = var_1043_cast_fp16)[name = string("op_1045_cast_fp16")]; + fp16 var_1046_to_fp16 = const()[name = string("op_1046_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_39_cast_fp16 = add(x = var_1045_cast_fp16, y = var_1046_to_fp16)[name = string("mean_squared_39_cast_fp16")]; + tensor var_1048_cast_fp16 = pow(x = mean_squared_39_cast_fp16, y = var_27_to_fp16)[name = string("op_1048_cast_fp16")]; + tensor normed_output_71_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = var_1048_cast_fp16)[name = string("normed_output_71_cast_fp16")]; + tensor const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78669824)))]; + tensor normed_output_73_cast_fp16 = mul(x = normed_output_71_cast_fp16, y = const_76_to_fp16)[name = string("normed_output_73_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.38p+3)]; + fp16 model_vision_tower_encoder_layers_2_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.34p+3)]; + tensor clip_36_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_73_cast_fp16)[name = string("clip_36_cast_fp16")]; + tensor model_vision_tower_encoder_layers_2_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78671424)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_2_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_36_cast_fp16)[name = string("linear_19_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.9cp+3)]; + fp16 model_vision_tower_encoder_layers_2_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.98p+3)]; + tensor clip_37_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_19_cast_fp16)[name = string("clip_37_cast_fp16")]; + string var_1065_mode_0 = const()[name = string("op_1065_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_1065_cast_fp16 = gelu(mode = var_1065_mode_0, x = clip_37_cast_fp16)[name = string("op_1065_cast_fp16")]; + tensor model_vision_tower_encoder_layers_2_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83390080)))]; + tensor linear_20_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_2_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_36_cast_fp16)[name = string("linear_20_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.9cp+3)]; + fp16 model_vision_tower_encoder_layers_2_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.98p+3)]; + tensor clip_39_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_mlp_up_proj_output_max_promoted_to_fp16, x = linear_20_cast_fp16)[name = string("clip_39_cast_fp16")]; + tensor hidden_states_167_cast_fp16 = mul(x = var_1065_cast_fp16, y = clip_39_cast_fp16)[name = string("hidden_states_167_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.46p+6)]; + fp16 model_vision_tower_encoder_layers_2_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.44p+6)]; + tensor clip_40_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_167_cast_fp16)[name = string("clip_40_cast_fp16")]; + tensor model_vision_tower_encoder_layers_2_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88108736)))]; + tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_2_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_40_cast_fp16)[name = string("linear_21_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_2_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.6p+6)]; + fp16 model_vision_tower_encoder_layers_2_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_2_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5ep+6)]; + tensor clip_41_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_2_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_2_mlp_down_proj_output_max_promoted_to_fp16, x = linear_21_cast_fp16)[name = string("clip_41_cast_fp16")]; + fp16 var_33_promoted_20_to_fp16 = const()[name = string("op_33_promoted_20_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1087_cast_fp16 = pow(x = clip_41_cast_fp16, y = var_33_promoted_20_to_fp16)[name = string("op_1087_cast_fp16")]; + tensor var_1089_axes_0 = const()[name = string("op_1089_axes_0"), val = tensor([-1])]; + bool var_1089_keep_dims_0 = const()[name = string("op_1089_keep_dims_0"), val = bool(true)]; + tensor var_1089_cast_fp16 = reduce_mean(axes = var_1089_axes_0, keep_dims = var_1089_keep_dims_0, x = var_1087_cast_fp16)[name = string("op_1089_cast_fp16")]; + fp16 var_1090_to_fp16 = const()[name = string("op_1090_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_41_cast_fp16 = add(x = var_1089_cast_fp16, y = var_1090_to_fp16)[name = string("mean_squared_41_cast_fp16")]; + tensor var_1092_cast_fp16 = pow(x = mean_squared_41_cast_fp16, y = var_27_to_fp16)[name = string("op_1092_cast_fp16")]; + tensor normed_output_75_cast_fp16 = mul(x = clip_41_cast_fp16, y = var_1092_cast_fp16)[name = string("normed_output_75_cast_fp16")]; + tensor const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92827392)))]; + tensor normed_output_77_cast_fp16 = mul(x = normed_output_75_cast_fp16, y = const_77_to_fp16)[name = string("normed_output_77_cast_fp16")]; + tensor hidden_states_177_cast_fp16 = add(x = hidden_states_157_cast_fp16, y = normed_output_77_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; + fp16 var_33_promoted_21_to_fp16 = const()[name = string("op_33_promoted_21_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1106_cast_fp16 = pow(x = hidden_states_177_cast_fp16, y = var_33_promoted_21_to_fp16)[name = string("op_1106_cast_fp16")]; + tensor var_1108_axes_0 = const()[name = string("op_1108_axes_0"), val = tensor([-1])]; + bool var_1108_keep_dims_0 = const()[name = string("op_1108_keep_dims_0"), val = bool(true)]; + tensor var_1108_cast_fp16 = reduce_mean(axes = var_1108_axes_0, keep_dims = var_1108_keep_dims_0, x = var_1106_cast_fp16)[name = string("op_1108_cast_fp16")]; + fp16 var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_43_cast_fp16 = add(x = var_1108_cast_fp16, y = var_1109_to_fp16)[name = string("mean_squared_43_cast_fp16")]; + tensor var_1111_cast_fp16 = pow(x = mean_squared_43_cast_fp16, y = var_27_to_fp16)[name = string("op_1111_cast_fp16")]; + tensor normed_output_79_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = var_1111_cast_fp16)[name = string("normed_output_79_cast_fp16")]; + tensor const_78_to_fp16 = const()[name = string("const_78_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92828992)))]; + tensor normed_output_81_cast_fp16 = mul(x = normed_output_79_cast_fp16, y = const_78_to_fp16)[name = string("normed_output_81_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.a4p+3)]; + fp16 model_vision_tower_encoder_layers_3_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.a2p+3)]; + tensor clip_42_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_81_cast_fp16)[name = string("clip_42_cast_fp16")]; + tensor model_vision_tower_encoder_layers_3_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92830592)))]; + tensor linear_22_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_3_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_42_cast_fp16)[name = string("linear_22_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.6p+4)]; + fp16 model_vision_tower_encoder_layers_3_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5ep+4)]; + tensor clip_43_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_22_cast_fp16)[name = string("clip_43_cast_fp16")]; + tensor var_1133 = const()[name = string("op_1133"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_185_cast_fp16 = reshape(shape = var_1133, x = clip_43_cast_fp16)[name = string("hidden_states_185_cast_fp16")]; + fp16 var_33_promoted_22_to_fp16 = const()[name = string("op_33_promoted_22_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1137_cast_fp16 = pow(x = hidden_states_185_cast_fp16, y = var_33_promoted_22_to_fp16)[name = string("op_1137_cast_fp16")]; + tensor var_1139_axes_0 = const()[name = string("op_1139_axes_0"), val = tensor([-1])]; + bool var_1139_keep_dims_0 = const()[name = string("op_1139_keep_dims_0"), val = bool(true)]; + tensor var_1139_cast_fp16 = reduce_mean(axes = var_1139_axes_0, keep_dims = var_1139_keep_dims_0, x = var_1137_cast_fp16)[name = string("op_1139_cast_fp16")]; + fp16 var_1140_to_fp16 = const()[name = string("op_1140_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_45_cast_fp16 = add(x = var_1139_cast_fp16, y = var_1140_to_fp16)[name = string("mean_squared_45_cast_fp16")]; + tensor var_1142_cast_fp16 = pow(x = mean_squared_45_cast_fp16, y = var_27_to_fp16)[name = string("op_1142_cast_fp16")]; + tensor normed_output_83_cast_fp16 = mul(x = hidden_states_185_cast_fp16, y = var_1142_cast_fp16)[name = string("normed_output_83_cast_fp16")]; + tensor const_81_to_fp16 = const()[name = string("const_81_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94010304)))]; + tensor normed_output_85_cast_fp16 = mul(x = normed_output_83_cast_fp16, y = const_81_to_fp16)[name = string("normed_output_85_cast_fp16")]; + tensor var_1162 = const()[name = string("op_1162"), val = tensor([32, 32])]; + int32 var_1163_axis_0 = const()[name = string("op_1163_axis_0"), val = int32(-1)]; + tensor var_1163_cast_fp16_0, tensor var_1163_cast_fp16_1 = split(axis = var_1163_axis_0, split_sizes = var_1162, x = normed_output_85_cast_fp16)[name = string("op_1163_cast_fp16")]; + tensor var_1166 = const()[name = string("op_1166"), val = tensor([32, 32])]; + int32 var_1167_axis_0 = const()[name = string("op_1167_axis_0"), val = int32(-1)]; + tensor var_1167_0, tensor var_1167_1 = split(axis = var_1167_axis_0, split_sizes = var_1166, x = var_160_cast_fp16)[name = string("op_1167")]; + tensor var_1170 = const()[name = string("op_1170"), val = tensor([32, 32])]; + int32 var_1171_axis_0 = const()[name = string("op_1171_axis_0"), val = int32(-1)]; + tensor var_1171_0, tensor var_1171_1 = split(axis = var_1171_axis_0, split_sizes = var_1170, x = var_163_cast_fp16)[name = string("op_1171")]; + tensor cos_53_axes_0 = const()[name = string("cos_53_axes_0"), val = tensor([2])]; + tensor cos_53 = expand_dims(axes = cos_53_axes_0, x = var_1167_0)[name = string("cos_53")]; + tensor sin_53_axes_0 = const()[name = string("sin_53_axes_0"), val = tensor([2])]; + tensor sin_53 = expand_dims(axes = sin_53_axes_0, x = var_1171_0)[name = string("sin_53")]; + tensor var_1176_cast_fp16 = mul(x = var_1163_cast_fp16_0, y = cos_53)[name = string("op_1176_cast_fp16")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1163_cast_fp16_0)[name = string("x1_25_cast_fp16")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1163_cast_fp16_0)[name = string("x2_25_cast_fp16")]; + fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1187_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_86_promoted_to_fp16)[name = string("op_1187_cast_fp16")]; + bool var_1189_interleave_0 = const()[name = string("op_1189_interleave_0"), val = bool(false)]; + tensor var_1189_cast_fp16 = concat(axis = var_38, interleave = var_1189_interleave_0, values = (var_1187_cast_fp16, x1_25_cast_fp16))[name = string("op_1189_cast_fp16")]; + tensor var_1190_cast_fp16 = mul(x = var_1189_cast_fp16, y = sin_53)[name = string("op_1190_cast_fp16")]; + tensor var_1191_cast_fp16 = add(x = var_1176_cast_fp16, y = var_1190_cast_fp16)[name = string("op_1191_cast_fp16")]; + tensor cos_57_axes_0 = const()[name = string("cos_57_axes_0"), val = tensor([2])]; + tensor cos_57 = expand_dims(axes = cos_57_axes_0, x = var_1167_1)[name = string("cos_57")]; + tensor sin_57_axes_0 = const()[name = string("sin_57_axes_0"), val = tensor([2])]; + tensor sin_57 = expand_dims(axes = sin_57_axes_0, x = var_1171_1)[name = string("sin_57")]; + tensor var_1194_cast_fp16 = mul(x = var_1163_cast_fp16_1, y = cos_57)[name = string("op_1194_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1163_cast_fp16_1)[name = string("x1_27_cast_fp16")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1163_cast_fp16_1)[name = string("x2_27_cast_fp16")]; + fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1205_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_1205_cast_fp16")]; + bool var_1207_interleave_0 = const()[name = string("op_1207_interleave_0"), val = bool(false)]; + tensor var_1207_cast_fp16 = concat(axis = var_38, interleave = var_1207_interleave_0, values = (var_1205_cast_fp16, x1_27_cast_fp16))[name = string("op_1207_cast_fp16")]; + tensor var_1208_cast_fp16 = mul(x = var_1207_cast_fp16, y = sin_57)[name = string("op_1208_cast_fp16")]; + tensor var_1209_cast_fp16 = add(x = var_1194_cast_fp16, y = var_1208_cast_fp16)[name = string("op_1209_cast_fp16")]; + bool query_states_7_interleave_0 = const()[name = string("query_states_7_interleave_0"), val = bool(false)]; + tensor query_states_7_cast_fp16 = concat(axis = var_38, interleave = query_states_7_interleave_0, values = (var_1191_cast_fp16, var_1209_cast_fp16))[name = string("query_states_7_cast_fp16")]; + tensor model_vision_tower_encoder_layers_3_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94010496)))]; + tensor linear_23_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_3_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_42_cast_fp16)[name = string("linear_23_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.76p+4)]; + fp16 model_vision_tower_encoder_layers_3_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.74p+4)]; + tensor clip_45_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_23_cast_fp16)[name = string("clip_45_cast_fp16")]; + tensor var_1222 = const()[name = string("op_1222"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_191_cast_fp16 = reshape(shape = var_1222, x = clip_45_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + fp16 var_33_promoted_23_to_fp16 = const()[name = string("op_33_promoted_23_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1226_cast_fp16 = pow(x = hidden_states_191_cast_fp16, y = var_33_promoted_23_to_fp16)[name = string("op_1226_cast_fp16")]; + tensor var_1228_axes_0 = const()[name = string("op_1228_axes_0"), val = tensor([-1])]; + bool var_1228_keep_dims_0 = const()[name = string("op_1228_keep_dims_0"), val = bool(true)]; + tensor var_1228_cast_fp16 = reduce_mean(axes = var_1228_axes_0, keep_dims = var_1228_keep_dims_0, x = var_1226_cast_fp16)[name = string("op_1228_cast_fp16")]; + fp16 var_1229_to_fp16 = const()[name = string("op_1229_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_47_cast_fp16 = add(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = string("mean_squared_47_cast_fp16")]; + tensor var_1231_cast_fp16 = pow(x = mean_squared_47_cast_fp16, y = var_27_to_fp16)[name = string("op_1231_cast_fp16")]; + tensor normed_output_87_cast_fp16 = mul(x = hidden_states_191_cast_fp16, y = var_1231_cast_fp16)[name = string("normed_output_87_cast_fp16")]; + tensor const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95190208)))]; + tensor normed_output_89_cast_fp16 = mul(x = normed_output_87_cast_fp16, y = const_90_to_fp16)[name = string("normed_output_89_cast_fp16")]; + tensor var_1251 = const()[name = string("op_1251"), val = tensor([32, 32])]; + int32 var_1252_axis_0 = const()[name = string("op_1252_axis_0"), val = int32(-1)]; + tensor var_1252_cast_fp16_0, tensor var_1252_cast_fp16_1 = split(axis = var_1252_axis_0, split_sizes = var_1251, x = normed_output_89_cast_fp16)[name = string("op_1252_cast_fp16")]; + tensor var_1255 = const()[name = string("op_1255"), val = tensor([32, 32])]; + int32 var_1256_axis_0 = const()[name = string("op_1256_axis_0"), val = int32(-1)]; + tensor var_1256_0, tensor var_1256_1 = split(axis = var_1256_axis_0, split_sizes = var_1255, x = var_160_cast_fp16)[name = string("op_1256")]; + tensor var_1259 = const()[name = string("op_1259"), val = tensor([32, 32])]; + int32 var_1260_axis_0 = const()[name = string("op_1260_axis_0"), val = int32(-1)]; + tensor var_1260_0, tensor var_1260_1 = split(axis = var_1260_axis_0, split_sizes = var_1259, x = var_163_cast_fp16)[name = string("op_1260")]; + tensor cos_61_axes_0 = const()[name = string("cos_61_axes_0"), val = tensor([2])]; + tensor cos_61 = expand_dims(axes = cos_61_axes_0, x = var_1256_0)[name = string("cos_61")]; + tensor sin_61_axes_0 = const()[name = string("sin_61_axes_0"), val = tensor([2])]; + tensor sin_61 = expand_dims(axes = sin_61_axes_0, x = var_1260_0)[name = string("sin_61")]; + tensor var_1265_cast_fp16 = mul(x = var_1252_cast_fp16_0, y = cos_61)[name = string("op_1265_cast_fp16")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1252_cast_fp16_0)[name = string("x1_29_cast_fp16")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1252_cast_fp16_0)[name = string("x2_29_cast_fp16")]; + fp16 const_95_promoted_to_fp16 = const()[name = string("const_95_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1276_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_95_promoted_to_fp16)[name = string("op_1276_cast_fp16")]; + bool var_1278_interleave_0 = const()[name = string("op_1278_interleave_0"), val = bool(false)]; + tensor var_1278_cast_fp16 = concat(axis = var_38, interleave = var_1278_interleave_0, values = (var_1276_cast_fp16, x1_29_cast_fp16))[name = string("op_1278_cast_fp16")]; + tensor var_1279_cast_fp16 = mul(x = var_1278_cast_fp16, y = sin_61)[name = string("op_1279_cast_fp16")]; + tensor var_1280_cast_fp16 = add(x = var_1265_cast_fp16, y = var_1279_cast_fp16)[name = string("op_1280_cast_fp16")]; + tensor cos_65_axes_0 = const()[name = string("cos_65_axes_0"), val = tensor([2])]; + tensor cos_65 = expand_dims(axes = cos_65_axes_0, x = var_1256_1)[name = string("cos_65")]; + tensor sin_65_axes_0 = const()[name = string("sin_65_axes_0"), val = tensor([2])]; + tensor sin_65 = expand_dims(axes = sin_65_axes_0, x = var_1260_1)[name = string("sin_65")]; + tensor var_1283_cast_fp16 = mul(x = var_1252_cast_fp16_1, y = cos_65)[name = string("op_1283_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_1252_cast_fp16_1)[name = string("x1_31_cast_fp16")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_1252_cast_fp16_1)[name = string("x2_31_cast_fp16")]; + fp16 const_98_promoted_to_fp16 = const()[name = string("const_98_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1294_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_98_promoted_to_fp16)[name = string("op_1294_cast_fp16")]; + bool var_1296_interleave_0 = const()[name = string("op_1296_interleave_0"), val = bool(false)]; + tensor var_1296_cast_fp16 = concat(axis = var_38, interleave = var_1296_interleave_0, values = (var_1294_cast_fp16, x1_31_cast_fp16))[name = string("op_1296_cast_fp16")]; + tensor var_1297_cast_fp16 = mul(x = var_1296_cast_fp16, y = sin_65)[name = string("op_1297_cast_fp16")]; + tensor var_1298_cast_fp16 = add(x = var_1283_cast_fp16, y = var_1297_cast_fp16)[name = string("op_1298_cast_fp16")]; + bool key_states_7_interleave_0 = const()[name = string("key_states_7_interleave_0"), val = bool(false)]; + tensor key_states_7_cast_fp16 = concat(axis = var_38, interleave = key_states_7_interleave_0, values = (var_1280_cast_fp16, var_1298_cast_fp16))[name = string("key_states_7_cast_fp16")]; + tensor model_vision_tower_encoder_layers_3_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95190400)))]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_3_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_42_cast_fp16)[name = string("linear_24_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.76p+4)]; + fp16 model_vision_tower_encoder_layers_3_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.74p+4)]; + tensor clip_47_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_24_cast_fp16)[name = string("clip_47_cast_fp16")]; + tensor var_1311 = const()[name = string("op_1311"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_197_cast_fp16 = reshape(shape = var_1311, x = clip_47_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; + fp16 var_33_promoted_24_to_fp16 = const()[name = string("op_33_promoted_24_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1314_cast_fp16 = pow(x = hidden_states_197_cast_fp16, y = var_33_promoted_24_to_fp16)[name = string("op_1314_cast_fp16")]; + tensor var_1316_axes_0 = const()[name = string("op_1316_axes_0"), val = tensor([-1])]; + bool var_1316_keep_dims_0 = const()[name = string("op_1316_keep_dims_0"), val = bool(true)]; + tensor var_1316_cast_fp16 = reduce_mean(axes = var_1316_axes_0, keep_dims = var_1316_keep_dims_0, x = var_1314_cast_fp16)[name = string("op_1316_cast_fp16")]; + fp16 var_1317_to_fp16 = const()[name = string("op_1317_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_49_cast_fp16 = add(x = var_1316_cast_fp16, y = var_1317_to_fp16)[name = string("mean_squared_49_cast_fp16")]; + tensor var_1319_cast_fp16 = pow(x = mean_squared_49_cast_fp16, y = var_27_to_fp16)[name = string("op_1319_cast_fp16")]; + tensor normed_output_91_cast_fp16 = mul(x = hidden_states_197_cast_fp16, y = var_1319_cast_fp16)[name = string("normed_output_91_cast_fp16")]; + tensor hidden_states_203_perm_0 = const()[name = string("hidden_states_203_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(true)]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = key_states_7_cast_fp16)[name = string("transpose_145")]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = query_states_7_cast_fp16)[name = string("transpose_146")]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("matmul_3_cast_fp16")]; + tensor add_3_cast_fp16 = add(x = matmul_3_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_3_cast_fp16")]; + int32 softmax_3_axis_0 = const()[name = string("softmax_3_axis_0"), val = int32(-1)]; + tensor softmax_3_cast_fp16 = softmax(axis = softmax_3_axis_0, x = add_3_cast_fp16)[name = string("softmax_3_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor hidden_states_203_cast_fp16 = transpose(perm = hidden_states_203_perm_0, x = normed_output_91_cast_fp16)[name = string("transpose_147")]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = softmax_3_cast_fp16, y = hidden_states_203_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_1324_perm_0 = const()[name = string("op_1324_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1326 = const()[name = string("op_1326"), val = tensor([1, 2304, -1])]; + tensor var_1324_cast_fp16 = transpose(perm = var_1324_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_144")]; + tensor var_1327_cast_fp16 = reshape(shape = var_1326, x = var_1324_cast_fp16)[name = string("op_1327_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.6cp+1)]; + fp16 model_vision_tower_encoder_layers_3_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.6ap+1)]; + tensor clip_48_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_self_attn_o_proj_input_max_promoted_to_fp16, x = var_1327_cast_fp16)[name = string("clip_48_cast_fp16")]; + tensor model_vision_tower_encoder_layers_3_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96370112)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_3_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_48_cast_fp16)[name = string("linear_25_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.02p+3)]; + fp16 model_vision_tower_encoder_layers_3_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1p+3)]; + tensor clip_49_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_25_cast_fp16)[name = string("clip_49_cast_fp16")]; + fp16 var_33_promoted_25_to_fp16 = const()[name = string("op_33_promoted_25_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1340_cast_fp16 = pow(x = clip_49_cast_fp16, y = var_33_promoted_25_to_fp16)[name = string("op_1340_cast_fp16")]; + tensor var_1342_axes_0 = const()[name = string("op_1342_axes_0"), val = tensor([-1])]; + bool var_1342_keep_dims_0 = const()[name = string("op_1342_keep_dims_0"), val = bool(true)]; + tensor var_1342_cast_fp16 = reduce_mean(axes = var_1342_axes_0, keep_dims = var_1342_keep_dims_0, x = var_1340_cast_fp16)[name = string("op_1342_cast_fp16")]; + fp16 var_1343_to_fp16 = const()[name = string("op_1343_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_51_cast_fp16 = add(x = var_1342_cast_fp16, y = var_1343_to_fp16)[name = string("mean_squared_51_cast_fp16")]; + tensor var_1345_cast_fp16 = pow(x = mean_squared_51_cast_fp16, y = var_27_to_fp16)[name = string("op_1345_cast_fp16")]; + tensor normed_output_93_cast_fp16 = mul(x = clip_49_cast_fp16, y = var_1345_cast_fp16)[name = string("normed_output_93_cast_fp16")]; + tensor const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97549824)))]; + tensor normed_output_95_cast_fp16 = mul(x = normed_output_93_cast_fp16, y = const_99_to_fp16)[name = string("normed_output_95_cast_fp16")]; + tensor hidden_states_215_cast_fp16 = add(x = hidden_states_177_cast_fp16, y = normed_output_95_cast_fp16)[name = string("hidden_states_215_cast_fp16")]; + fp16 var_33_promoted_26_to_fp16 = const()[name = string("op_33_promoted_26_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1353_cast_fp16 = pow(x = hidden_states_215_cast_fp16, y = var_33_promoted_26_to_fp16)[name = string("op_1353_cast_fp16")]; + tensor var_1355_axes_0 = const()[name = string("op_1355_axes_0"), val = tensor([-1])]; + bool var_1355_keep_dims_0 = const()[name = string("op_1355_keep_dims_0"), val = bool(true)]; + tensor var_1355_cast_fp16 = reduce_mean(axes = var_1355_axes_0, keep_dims = var_1355_keep_dims_0, x = var_1353_cast_fp16)[name = string("op_1355_cast_fp16")]; + fp16 var_1356_to_fp16 = const()[name = string("op_1356_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_53_cast_fp16 = add(x = var_1355_cast_fp16, y = var_1356_to_fp16)[name = string("mean_squared_53_cast_fp16")]; + tensor var_1358_cast_fp16 = pow(x = mean_squared_53_cast_fp16, y = var_27_to_fp16)[name = string("op_1358_cast_fp16")]; + tensor normed_output_97_cast_fp16 = mul(x = hidden_states_215_cast_fp16, y = var_1358_cast_fp16)[name = string("normed_output_97_cast_fp16")]; + tensor const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97551424)))]; + tensor normed_output_99_cast_fp16 = mul(x = normed_output_97_cast_fp16, y = const_100_to_fp16)[name = string("normed_output_99_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.8cp+3)]; + fp16 model_vision_tower_encoder_layers_3_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.88p+3)]; + tensor clip_50_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_99_cast_fp16)[name = string("clip_50_cast_fp16")]; + tensor model_vision_tower_encoder_layers_3_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97553024)))]; + tensor linear_26_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_3_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_50_cast_fp16)[name = string("linear_26_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.02p+4)]; + fp16 model_vision_tower_encoder_layers_3_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1p+4)]; + tensor clip_51_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_26_cast_fp16)[name = string("clip_51_cast_fp16")]; + string var_1375_mode_0 = const()[name = string("op_1375_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_1375_cast_fp16 = gelu(mode = var_1375_mode_0, x = clip_51_cast_fp16)[name = string("op_1375_cast_fp16")]; + tensor model_vision_tower_encoder_layers_3_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102271680)))]; + tensor linear_27_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_3_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_50_cast_fp16)[name = string("linear_27_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.02p+4)]; + fp16 model_vision_tower_encoder_layers_3_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1p+4)]; + tensor clip_53_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_mlp_up_proj_output_max_promoted_to_fp16, x = linear_27_cast_fp16)[name = string("clip_53_cast_fp16")]; + tensor hidden_states_225_cast_fp16 = mul(x = var_1375_cast_fp16, y = clip_53_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.5ep+6)]; + fp16 model_vision_tower_encoder_layers_3_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.5cp+6)]; + tensor clip_54_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_225_cast_fp16)[name = string("clip_54_cast_fp16")]; + tensor model_vision_tower_encoder_layers_3_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106990336)))]; + tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_3_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_54_cast_fp16)[name = string("linear_28_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_3_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.f6p+5)]; + fp16 model_vision_tower_encoder_layers_3_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_3_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.f2p+5)]; + tensor clip_55_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_3_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_3_mlp_down_proj_output_max_promoted_to_fp16, x = linear_28_cast_fp16)[name = string("clip_55_cast_fp16")]; + fp16 var_33_promoted_27_to_fp16 = const()[name = string("op_33_promoted_27_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1397_cast_fp16 = pow(x = clip_55_cast_fp16, y = var_33_promoted_27_to_fp16)[name = string("op_1397_cast_fp16")]; + tensor var_1399_axes_0 = const()[name = string("op_1399_axes_0"), val = tensor([-1])]; + bool var_1399_keep_dims_0 = const()[name = string("op_1399_keep_dims_0"), val = bool(true)]; + tensor var_1399_cast_fp16 = reduce_mean(axes = var_1399_axes_0, keep_dims = var_1399_keep_dims_0, x = var_1397_cast_fp16)[name = string("op_1399_cast_fp16")]; + fp16 var_1400_to_fp16 = const()[name = string("op_1400_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_55_cast_fp16 = add(x = var_1399_cast_fp16, y = var_1400_to_fp16)[name = string("mean_squared_55_cast_fp16")]; + tensor var_1402_cast_fp16 = pow(x = mean_squared_55_cast_fp16, y = var_27_to_fp16)[name = string("op_1402_cast_fp16")]; + tensor normed_output_101_cast_fp16 = mul(x = clip_55_cast_fp16, y = var_1402_cast_fp16)[name = string("normed_output_101_cast_fp16")]; + tensor const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111708992)))]; + tensor normed_output_103_cast_fp16 = mul(x = normed_output_101_cast_fp16, y = const_101_to_fp16)[name = string("normed_output_103_cast_fp16")]; + tensor hidden_states_235_cast_fp16 = add(x = hidden_states_215_cast_fp16, y = normed_output_103_cast_fp16)[name = string("hidden_states_235_cast_fp16")]; + fp16 var_33_promoted_28_to_fp16 = const()[name = string("op_33_promoted_28_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1416_cast_fp16 = pow(x = hidden_states_235_cast_fp16, y = var_33_promoted_28_to_fp16)[name = string("op_1416_cast_fp16")]; + tensor var_1418_axes_0 = const()[name = string("op_1418_axes_0"), val = tensor([-1])]; + bool var_1418_keep_dims_0 = const()[name = string("op_1418_keep_dims_0"), val = bool(true)]; + tensor var_1418_cast_fp16 = reduce_mean(axes = var_1418_axes_0, keep_dims = var_1418_keep_dims_0, x = var_1416_cast_fp16)[name = string("op_1418_cast_fp16")]; + fp16 var_1419_to_fp16 = const()[name = string("op_1419_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_57_cast_fp16 = add(x = var_1418_cast_fp16, y = var_1419_to_fp16)[name = string("mean_squared_57_cast_fp16")]; + tensor var_1421_cast_fp16 = pow(x = mean_squared_57_cast_fp16, y = var_27_to_fp16)[name = string("op_1421_cast_fp16")]; + tensor normed_output_105_cast_fp16 = mul(x = hidden_states_235_cast_fp16, y = var_1421_cast_fp16)[name = string("normed_output_105_cast_fp16")]; + tensor const_102_to_fp16 = const()[name = string("const_102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111710592)))]; + tensor normed_output_107_cast_fp16 = mul(x = normed_output_105_cast_fp16, y = const_102_to_fp16)[name = string("normed_output_107_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.9ap+3)]; + fp16 model_vision_tower_encoder_layers_4_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.96p+3)]; + tensor clip_56_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_107_cast_fp16)[name = string("clip_56_cast_fp16")]; + tensor model_vision_tower_encoder_layers_4_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111712192)))]; + tensor linear_29_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_4_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_56_cast_fp16)[name = string("linear_29_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.32p+4)]; + fp16 model_vision_tower_encoder_layers_4_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.3p+4)]; + tensor clip_57_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_29_cast_fp16)[name = string("clip_57_cast_fp16")]; + tensor var_1443 = const()[name = string("op_1443"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_243_cast_fp16 = reshape(shape = var_1443, x = clip_57_cast_fp16)[name = string("hidden_states_243_cast_fp16")]; + fp16 var_33_promoted_29_to_fp16 = const()[name = string("op_33_promoted_29_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1447_cast_fp16 = pow(x = hidden_states_243_cast_fp16, y = var_33_promoted_29_to_fp16)[name = string("op_1447_cast_fp16")]; + tensor var_1449_axes_0 = const()[name = string("op_1449_axes_0"), val = tensor([-1])]; + bool var_1449_keep_dims_0 = const()[name = string("op_1449_keep_dims_0"), val = bool(true)]; + tensor var_1449_cast_fp16 = reduce_mean(axes = var_1449_axes_0, keep_dims = var_1449_keep_dims_0, x = var_1447_cast_fp16)[name = string("op_1449_cast_fp16")]; + fp16 var_1450_to_fp16 = const()[name = string("op_1450_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_59_cast_fp16 = add(x = var_1449_cast_fp16, y = var_1450_to_fp16)[name = string("mean_squared_59_cast_fp16")]; + tensor var_1452_cast_fp16 = pow(x = mean_squared_59_cast_fp16, y = var_27_to_fp16)[name = string("op_1452_cast_fp16")]; + tensor normed_output_109_cast_fp16 = mul(x = hidden_states_243_cast_fp16, y = var_1452_cast_fp16)[name = string("normed_output_109_cast_fp16")]; + tensor const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112891904)))]; + tensor normed_output_111_cast_fp16 = mul(x = normed_output_109_cast_fp16, y = const_105_to_fp16)[name = string("normed_output_111_cast_fp16")]; + tensor var_1472 = const()[name = string("op_1472"), val = tensor([32, 32])]; + int32 var_1473_axis_0 = const()[name = string("op_1473_axis_0"), val = int32(-1)]; + tensor var_1473_cast_fp16_0, tensor var_1473_cast_fp16_1 = split(axis = var_1473_axis_0, split_sizes = var_1472, x = normed_output_111_cast_fp16)[name = string("op_1473_cast_fp16")]; + tensor var_1476 = const()[name = string("op_1476"), val = tensor([32, 32])]; + int32 var_1477_axis_0 = const()[name = string("op_1477_axis_0"), val = int32(-1)]; + tensor var_1477_0, tensor var_1477_1 = split(axis = var_1477_axis_0, split_sizes = var_1476, x = var_160_cast_fp16)[name = string("op_1477")]; + tensor var_1480 = const()[name = string("op_1480"), val = tensor([32, 32])]; + int32 var_1481_axis_0 = const()[name = string("op_1481_axis_0"), val = int32(-1)]; + tensor var_1481_0, tensor var_1481_1 = split(axis = var_1481_axis_0, split_sizes = var_1480, x = var_163_cast_fp16)[name = string("op_1481")]; + tensor cos_69_axes_0 = const()[name = string("cos_69_axes_0"), val = tensor([2])]; + tensor cos_69 = expand_dims(axes = cos_69_axes_0, x = var_1477_0)[name = string("cos_69")]; + tensor sin_69_axes_0 = const()[name = string("sin_69_axes_0"), val = tensor([2])]; + tensor sin_69 = expand_dims(axes = sin_69_axes_0, x = var_1481_0)[name = string("sin_69")]; + tensor var_1486_cast_fp16 = mul(x = var_1473_cast_fp16_0, y = cos_69)[name = string("op_1486_cast_fp16")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_1473_cast_fp16_0)[name = string("x1_33_cast_fp16")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_1473_cast_fp16_0)[name = string("x2_33_cast_fp16")]; + fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1497_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_110_promoted_to_fp16)[name = string("op_1497_cast_fp16")]; + bool var_1499_interleave_0 = const()[name = string("op_1499_interleave_0"), val = bool(false)]; + tensor var_1499_cast_fp16 = concat(axis = var_38, interleave = var_1499_interleave_0, values = (var_1497_cast_fp16, x1_33_cast_fp16))[name = string("op_1499_cast_fp16")]; + tensor var_1500_cast_fp16 = mul(x = var_1499_cast_fp16, y = sin_69)[name = string("op_1500_cast_fp16")]; + tensor var_1501_cast_fp16 = add(x = var_1486_cast_fp16, y = var_1500_cast_fp16)[name = string("op_1501_cast_fp16")]; + tensor cos_73_axes_0 = const()[name = string("cos_73_axes_0"), val = tensor([2])]; + tensor cos_73 = expand_dims(axes = cos_73_axes_0, x = var_1477_1)[name = string("cos_73")]; + tensor sin_73_axes_0 = const()[name = string("sin_73_axes_0"), val = tensor([2])]; + tensor sin_73 = expand_dims(axes = sin_73_axes_0, x = var_1481_1)[name = string("sin_73")]; + tensor var_1504_cast_fp16 = mul(x = var_1473_cast_fp16_1, y = cos_73)[name = string("op_1504_cast_fp16")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_1473_cast_fp16_1)[name = string("x1_35_cast_fp16")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_1473_cast_fp16_1)[name = string("x2_35_cast_fp16")]; + fp16 const_113_promoted_to_fp16 = const()[name = string("const_113_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1515_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_113_promoted_to_fp16)[name = string("op_1515_cast_fp16")]; + bool var_1517_interleave_0 = const()[name = string("op_1517_interleave_0"), val = bool(false)]; + tensor var_1517_cast_fp16 = concat(axis = var_38, interleave = var_1517_interleave_0, values = (var_1515_cast_fp16, x1_35_cast_fp16))[name = string("op_1517_cast_fp16")]; + tensor var_1518_cast_fp16 = mul(x = var_1517_cast_fp16, y = sin_73)[name = string("op_1518_cast_fp16")]; + tensor var_1519_cast_fp16 = add(x = var_1504_cast_fp16, y = var_1518_cast_fp16)[name = string("op_1519_cast_fp16")]; + bool query_states_9_interleave_0 = const()[name = string("query_states_9_interleave_0"), val = bool(false)]; + tensor query_states_9_cast_fp16 = concat(axis = var_38, interleave = query_states_9_interleave_0, values = (var_1501_cast_fp16, var_1519_cast_fp16))[name = string("query_states_9_cast_fp16")]; + tensor model_vision_tower_encoder_layers_4_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112892096)))]; + tensor linear_30_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_4_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_56_cast_fp16)[name = string("linear_30_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.6p+4)]; + fp16 model_vision_tower_encoder_layers_4_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5ep+4)]; + tensor clip_59_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_30_cast_fp16)[name = string("clip_59_cast_fp16")]; + tensor var_1532 = const()[name = string("op_1532"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_249_cast_fp16 = reshape(shape = var_1532, x = clip_59_cast_fp16)[name = string("hidden_states_249_cast_fp16")]; + fp16 var_33_promoted_30_to_fp16 = const()[name = string("op_33_promoted_30_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1536_cast_fp16 = pow(x = hidden_states_249_cast_fp16, y = var_33_promoted_30_to_fp16)[name = string("op_1536_cast_fp16")]; + tensor var_1538_axes_0 = const()[name = string("op_1538_axes_0"), val = tensor([-1])]; + bool var_1538_keep_dims_0 = const()[name = string("op_1538_keep_dims_0"), val = bool(true)]; + tensor var_1538_cast_fp16 = reduce_mean(axes = var_1538_axes_0, keep_dims = var_1538_keep_dims_0, x = var_1536_cast_fp16)[name = string("op_1538_cast_fp16")]; + fp16 var_1539_to_fp16 = const()[name = string("op_1539_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_61_cast_fp16 = add(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = string("mean_squared_61_cast_fp16")]; + tensor var_1541_cast_fp16 = pow(x = mean_squared_61_cast_fp16, y = var_27_to_fp16)[name = string("op_1541_cast_fp16")]; + tensor normed_output_113_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = var_1541_cast_fp16)[name = string("normed_output_113_cast_fp16")]; + tensor const_114_to_fp16 = const()[name = string("const_114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114071808)))]; + tensor normed_output_115_cast_fp16 = mul(x = normed_output_113_cast_fp16, y = const_114_to_fp16)[name = string("normed_output_115_cast_fp16")]; + tensor var_1561 = const()[name = string("op_1561"), val = tensor([32, 32])]; + int32 var_1562_axis_0 = const()[name = string("op_1562_axis_0"), val = int32(-1)]; + tensor var_1562_cast_fp16_0, tensor var_1562_cast_fp16_1 = split(axis = var_1562_axis_0, split_sizes = var_1561, x = normed_output_115_cast_fp16)[name = string("op_1562_cast_fp16")]; + tensor var_1565 = const()[name = string("op_1565"), val = tensor([32, 32])]; + int32 var_1566_axis_0 = const()[name = string("op_1566_axis_0"), val = int32(-1)]; + tensor var_1566_0, tensor var_1566_1 = split(axis = var_1566_axis_0, split_sizes = var_1565, x = var_160_cast_fp16)[name = string("op_1566")]; + tensor var_1569 = const()[name = string("op_1569"), val = tensor([32, 32])]; + int32 var_1570_axis_0 = const()[name = string("op_1570_axis_0"), val = int32(-1)]; + tensor var_1570_0, tensor var_1570_1 = split(axis = var_1570_axis_0, split_sizes = var_1569, x = var_163_cast_fp16)[name = string("op_1570")]; + tensor cos_77_axes_0 = const()[name = string("cos_77_axes_0"), val = tensor([2])]; + tensor cos_77 = expand_dims(axes = cos_77_axes_0, x = var_1566_0)[name = string("cos_77")]; + tensor sin_77_axes_0 = const()[name = string("sin_77_axes_0"), val = tensor([2])]; + tensor sin_77 = expand_dims(axes = sin_77_axes_0, x = var_1570_0)[name = string("sin_77")]; + tensor var_1575_cast_fp16 = mul(x = var_1562_cast_fp16_0, y = cos_77)[name = string("op_1575_cast_fp16")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_1562_cast_fp16_0)[name = string("x1_37_cast_fp16")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_1562_cast_fp16_0)[name = string("x2_37_cast_fp16")]; + fp16 const_119_promoted_to_fp16 = const()[name = string("const_119_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1586_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_119_promoted_to_fp16)[name = string("op_1586_cast_fp16")]; + bool var_1588_interleave_0 = const()[name = string("op_1588_interleave_0"), val = bool(false)]; + tensor var_1588_cast_fp16 = concat(axis = var_38, interleave = var_1588_interleave_0, values = (var_1586_cast_fp16, x1_37_cast_fp16))[name = string("op_1588_cast_fp16")]; + tensor var_1589_cast_fp16 = mul(x = var_1588_cast_fp16, y = sin_77)[name = string("op_1589_cast_fp16")]; + tensor var_1590_cast_fp16 = add(x = var_1575_cast_fp16, y = var_1589_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor cos_81_axes_0 = const()[name = string("cos_81_axes_0"), val = tensor([2])]; + tensor cos_81 = expand_dims(axes = cos_81_axes_0, x = var_1566_1)[name = string("cos_81")]; + tensor sin_81_axes_0 = const()[name = string("sin_81_axes_0"), val = tensor([2])]; + tensor sin_81 = expand_dims(axes = sin_81_axes_0, x = var_1570_1)[name = string("sin_81")]; + tensor var_1593_cast_fp16 = mul(x = var_1562_cast_fp16_1, y = cos_81)[name = string("op_1593_cast_fp16")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_1562_cast_fp16_1)[name = string("x1_39_cast_fp16")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_1562_cast_fp16_1)[name = string("x2_39_cast_fp16")]; + fp16 const_122_promoted_to_fp16 = const()[name = string("const_122_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1604_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_122_promoted_to_fp16)[name = string("op_1604_cast_fp16")]; + bool var_1606_interleave_0 = const()[name = string("op_1606_interleave_0"), val = bool(false)]; + tensor var_1606_cast_fp16 = concat(axis = var_38, interleave = var_1606_interleave_0, values = (var_1604_cast_fp16, x1_39_cast_fp16))[name = string("op_1606_cast_fp16")]; + tensor var_1607_cast_fp16 = mul(x = var_1606_cast_fp16, y = sin_81)[name = string("op_1607_cast_fp16")]; + tensor var_1608_cast_fp16 = add(x = var_1593_cast_fp16, y = var_1607_cast_fp16)[name = string("op_1608_cast_fp16")]; + bool key_states_9_interleave_0 = const()[name = string("key_states_9_interleave_0"), val = bool(false)]; + tensor key_states_9_cast_fp16 = concat(axis = var_38, interleave = key_states_9_interleave_0, values = (var_1590_cast_fp16, var_1608_cast_fp16))[name = string("key_states_9_cast_fp16")]; + tensor model_vision_tower_encoder_layers_4_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114072000)))]; + tensor linear_31_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_4_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_56_cast_fp16)[name = string("linear_31_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.6p+4)]; + fp16 model_vision_tower_encoder_layers_4_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5ep+4)]; + tensor clip_61_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_31_cast_fp16)[name = string("clip_61_cast_fp16")]; + tensor var_1621 = const()[name = string("op_1621"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_255_cast_fp16 = reshape(shape = var_1621, x = clip_61_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; + fp16 var_33_promoted_31_to_fp16 = const()[name = string("op_33_promoted_31_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1624_cast_fp16 = pow(x = hidden_states_255_cast_fp16, y = var_33_promoted_31_to_fp16)[name = string("op_1624_cast_fp16")]; + tensor var_1626_axes_0 = const()[name = string("op_1626_axes_0"), val = tensor([-1])]; + bool var_1626_keep_dims_0 = const()[name = string("op_1626_keep_dims_0"), val = bool(true)]; + tensor var_1626_cast_fp16 = reduce_mean(axes = var_1626_axes_0, keep_dims = var_1626_keep_dims_0, x = var_1624_cast_fp16)[name = string("op_1626_cast_fp16")]; + fp16 var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_63_cast_fp16 = add(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = string("mean_squared_63_cast_fp16")]; + tensor var_1629_cast_fp16 = pow(x = mean_squared_63_cast_fp16, y = var_27_to_fp16)[name = string("op_1629_cast_fp16")]; + tensor normed_output_117_cast_fp16 = mul(x = hidden_states_255_cast_fp16, y = var_1629_cast_fp16)[name = string("normed_output_117_cast_fp16")]; + tensor hidden_states_261_perm_0 = const()[name = string("hidden_states_261_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(true)]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_73 = transpose(perm = transpose_73_perm_0, x = key_states_9_cast_fp16)[name = string("transpose_141")]; + tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = query_states_9_cast_fp16)[name = string("transpose_142")]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("matmul_4_cast_fp16")]; + tensor add_4_cast_fp16 = add(x = matmul_4_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_4_cast_fp16")]; + int32 softmax_4_axis_0 = const()[name = string("softmax_4_axis_0"), val = int32(-1)]; + tensor softmax_4_cast_fp16 = softmax(axis = softmax_4_axis_0, x = add_4_cast_fp16)[name = string("softmax_4_cast_fp16")]; + bool attn_output_17_transpose_x_0 = const()[name = string("attn_output_17_transpose_x_0"), val = bool(false)]; + bool attn_output_17_transpose_y_0 = const()[name = string("attn_output_17_transpose_y_0"), val = bool(false)]; + tensor hidden_states_261_cast_fp16 = transpose(perm = hidden_states_261_perm_0, x = normed_output_117_cast_fp16)[name = string("transpose_143")]; + tensor attn_output_17_cast_fp16 = matmul(transpose_x = attn_output_17_transpose_x_0, transpose_y = attn_output_17_transpose_y_0, x = softmax_4_cast_fp16, y = hidden_states_261_cast_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor var_1634_perm_0 = const()[name = string("op_1634_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1636 = const()[name = string("op_1636"), val = tensor([1, 2304, -1])]; + tensor var_1634_cast_fp16 = transpose(perm = var_1634_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_140")]; + tensor var_1637_cast_fp16 = reshape(shape = var_1636, x = var_1634_cast_fp16)[name = string("op_1637_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.32p+1)]; + fp16 model_vision_tower_encoder_layers_4_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.3p+1)]; + tensor clip_62_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_self_attn_o_proj_input_max_promoted_to_fp16, x = var_1637_cast_fp16)[name = string("clip_62_cast_fp16")]; + tensor model_vision_tower_encoder_layers_4_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115251712)))]; + tensor linear_32_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_4_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_62_cast_fp16)[name = string("linear_32_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.72p+2)]; + fp16 model_vision_tower_encoder_layers_4_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.6ep+2)]; + tensor clip_63_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_32_cast_fp16)[name = string("clip_63_cast_fp16")]; + fp16 var_33_promoted_32_to_fp16 = const()[name = string("op_33_promoted_32_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1650_cast_fp16 = pow(x = clip_63_cast_fp16, y = var_33_promoted_32_to_fp16)[name = string("op_1650_cast_fp16")]; + tensor var_1652_axes_0 = const()[name = string("op_1652_axes_0"), val = tensor([-1])]; + bool var_1652_keep_dims_0 = const()[name = string("op_1652_keep_dims_0"), val = bool(true)]; + tensor var_1652_cast_fp16 = reduce_mean(axes = var_1652_axes_0, keep_dims = var_1652_keep_dims_0, x = var_1650_cast_fp16)[name = string("op_1652_cast_fp16")]; + fp16 var_1653_to_fp16 = const()[name = string("op_1653_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_65_cast_fp16 = add(x = var_1652_cast_fp16, y = var_1653_to_fp16)[name = string("mean_squared_65_cast_fp16")]; + tensor var_1655_cast_fp16 = pow(x = mean_squared_65_cast_fp16, y = var_27_to_fp16)[name = string("op_1655_cast_fp16")]; + tensor normed_output_119_cast_fp16 = mul(x = clip_63_cast_fp16, y = var_1655_cast_fp16)[name = string("normed_output_119_cast_fp16")]; + tensor const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116431424)))]; + tensor normed_output_121_cast_fp16 = mul(x = normed_output_119_cast_fp16, y = const_123_to_fp16)[name = string("normed_output_121_cast_fp16")]; + tensor hidden_states_273_cast_fp16 = add(x = hidden_states_235_cast_fp16, y = normed_output_121_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; + fp16 var_33_promoted_33_to_fp16 = const()[name = string("op_33_promoted_33_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1663_cast_fp16 = pow(x = hidden_states_273_cast_fp16, y = var_33_promoted_33_to_fp16)[name = string("op_1663_cast_fp16")]; + tensor var_1665_axes_0 = const()[name = string("op_1665_axes_0"), val = tensor([-1])]; + bool var_1665_keep_dims_0 = const()[name = string("op_1665_keep_dims_0"), val = bool(true)]; + tensor var_1665_cast_fp16 = reduce_mean(axes = var_1665_axes_0, keep_dims = var_1665_keep_dims_0, x = var_1663_cast_fp16)[name = string("op_1665_cast_fp16")]; + fp16 var_1666_to_fp16 = const()[name = string("op_1666_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_67_cast_fp16 = add(x = var_1665_cast_fp16, y = var_1666_to_fp16)[name = string("mean_squared_67_cast_fp16")]; + tensor var_1668_cast_fp16 = pow(x = mean_squared_67_cast_fp16, y = var_27_to_fp16)[name = string("op_1668_cast_fp16")]; + tensor normed_output_123_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = var_1668_cast_fp16)[name = string("normed_output_123_cast_fp16")]; + tensor const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116433024)))]; + tensor normed_output_125_cast_fp16 = mul(x = normed_output_123_cast_fp16, y = const_124_to_fp16)[name = string("normed_output_125_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.9ap+3)]; + fp16 model_vision_tower_encoder_layers_4_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.96p+3)]; + tensor clip_64_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_125_cast_fp16)[name = string("clip_64_cast_fp16")]; + tensor model_vision_tower_encoder_layers_4_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116434624)))]; + tensor linear_33_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_4_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_64_cast_fp16)[name = string("linear_33_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.16p+4)]; + fp16 model_vision_tower_encoder_layers_4_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.14p+4)]; + tensor clip_65_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_33_cast_fp16)[name = string("clip_65_cast_fp16")]; + string var_1685_mode_0 = const()[name = string("op_1685_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_1685_cast_fp16 = gelu(mode = var_1685_mode_0, x = clip_65_cast_fp16)[name = string("op_1685_cast_fp16")]; + tensor model_vision_tower_encoder_layers_4_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121153280)))]; + tensor linear_34_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_4_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_64_cast_fp16)[name = string("linear_34_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.16p+4)]; + fp16 model_vision_tower_encoder_layers_4_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.14p+4)]; + tensor clip_67_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_mlp_up_proj_output_max_promoted_to_fp16, x = linear_34_cast_fp16)[name = string("clip_67_cast_fp16")]; + tensor hidden_states_283_cast_fp16 = mul(x = var_1685_cast_fp16, y = clip_67_cast_fp16)[name = string("hidden_states_283_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.6cp+6)]; + fp16 model_vision_tower_encoder_layers_4_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.68p+6)]; + tensor clip_68_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_283_cast_fp16)[name = string("clip_68_cast_fp16")]; + tensor model_vision_tower_encoder_layers_4_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125871936)))]; + tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_4_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_68_cast_fp16)[name = string("linear_35_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_4_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.28p+6)]; + fp16 model_vision_tower_encoder_layers_4_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_4_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.26p+6)]; + tensor clip_69_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_4_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_4_mlp_down_proj_output_max_promoted_to_fp16, x = linear_35_cast_fp16)[name = string("clip_69_cast_fp16")]; + fp16 var_33_promoted_34_to_fp16 = const()[name = string("op_33_promoted_34_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1707_cast_fp16 = pow(x = clip_69_cast_fp16, y = var_33_promoted_34_to_fp16)[name = string("op_1707_cast_fp16")]; + tensor var_1709_axes_0 = const()[name = string("op_1709_axes_0"), val = tensor([-1])]; + bool var_1709_keep_dims_0 = const()[name = string("op_1709_keep_dims_0"), val = bool(true)]; + tensor var_1709_cast_fp16 = reduce_mean(axes = var_1709_axes_0, keep_dims = var_1709_keep_dims_0, x = var_1707_cast_fp16)[name = string("op_1709_cast_fp16")]; + fp16 var_1710_to_fp16 = const()[name = string("op_1710_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_69_cast_fp16 = add(x = var_1709_cast_fp16, y = var_1710_to_fp16)[name = string("mean_squared_69_cast_fp16")]; + tensor var_1712_cast_fp16 = pow(x = mean_squared_69_cast_fp16, y = var_27_to_fp16)[name = string("op_1712_cast_fp16")]; + tensor normed_output_127_cast_fp16 = mul(x = clip_69_cast_fp16, y = var_1712_cast_fp16)[name = string("normed_output_127_cast_fp16")]; + tensor const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130590592)))]; + tensor normed_output_129_cast_fp16 = mul(x = normed_output_127_cast_fp16, y = const_125_to_fp16)[name = string("normed_output_129_cast_fp16")]; + tensor hidden_states_293_cast_fp16 = add(x = hidden_states_273_cast_fp16, y = normed_output_129_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + fp16 var_33_promoted_35_to_fp16 = const()[name = string("op_33_promoted_35_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1726_cast_fp16 = pow(x = hidden_states_293_cast_fp16, y = var_33_promoted_35_to_fp16)[name = string("op_1726_cast_fp16")]; + tensor var_1728_axes_0 = const()[name = string("op_1728_axes_0"), val = tensor([-1])]; + bool var_1728_keep_dims_0 = const()[name = string("op_1728_keep_dims_0"), val = bool(true)]; + tensor var_1728_cast_fp16 = reduce_mean(axes = var_1728_axes_0, keep_dims = var_1728_keep_dims_0, x = var_1726_cast_fp16)[name = string("op_1728_cast_fp16")]; + fp16 var_1729_to_fp16 = const()[name = string("op_1729_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_71_cast_fp16 = add(x = var_1728_cast_fp16, y = var_1729_to_fp16)[name = string("mean_squared_71_cast_fp16")]; + tensor var_1731_cast_fp16 = pow(x = mean_squared_71_cast_fp16, y = var_27_to_fp16)[name = string("op_1731_cast_fp16")]; + tensor normed_output_131_cast_fp16 = mul(x = hidden_states_293_cast_fp16, y = var_1731_cast_fp16)[name = string("normed_output_131_cast_fp16")]; + tensor const_126_to_fp16 = const()[name = string("const_126_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130592192)))]; + tensor normed_output_133_cast_fp16 = mul(x = normed_output_131_cast_fp16, y = const_126_to_fp16)[name = string("normed_output_133_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.84p+3)]; + fp16 model_vision_tower_encoder_layers_5_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.82p+3)]; + tensor clip_70_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_133_cast_fp16)[name = string("clip_70_cast_fp16")]; + tensor model_vision_tower_encoder_layers_5_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130593792)))]; + tensor linear_36_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_5_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_70_cast_fp16)[name = string("linear_36_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.fcp+3)]; + fp16 model_vision_tower_encoder_layers_5_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.f8p+3)]; + tensor clip_71_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_36_cast_fp16)[name = string("clip_71_cast_fp16")]; + tensor var_1753 = const()[name = string("op_1753"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_301_cast_fp16 = reshape(shape = var_1753, x = clip_71_cast_fp16)[name = string("hidden_states_301_cast_fp16")]; + fp16 var_33_promoted_36_to_fp16 = const()[name = string("op_33_promoted_36_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1757_cast_fp16 = pow(x = hidden_states_301_cast_fp16, y = var_33_promoted_36_to_fp16)[name = string("op_1757_cast_fp16")]; + tensor var_1759_axes_0 = const()[name = string("op_1759_axes_0"), val = tensor([-1])]; + bool var_1759_keep_dims_0 = const()[name = string("op_1759_keep_dims_0"), val = bool(true)]; + tensor var_1759_cast_fp16 = reduce_mean(axes = var_1759_axes_0, keep_dims = var_1759_keep_dims_0, x = var_1757_cast_fp16)[name = string("op_1759_cast_fp16")]; + fp16 var_1760_to_fp16 = const()[name = string("op_1760_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_73_cast_fp16 = add(x = var_1759_cast_fp16, y = var_1760_to_fp16)[name = string("mean_squared_73_cast_fp16")]; + tensor var_1762_cast_fp16 = pow(x = mean_squared_73_cast_fp16, y = var_27_to_fp16)[name = string("op_1762_cast_fp16")]; + tensor normed_output_135_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = var_1762_cast_fp16)[name = string("normed_output_135_cast_fp16")]; + tensor const_129_to_fp16 = const()[name = string("const_129_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131773504)))]; + tensor normed_output_137_cast_fp16 = mul(x = normed_output_135_cast_fp16, y = const_129_to_fp16)[name = string("normed_output_137_cast_fp16")]; + tensor var_1782 = const()[name = string("op_1782"), val = tensor([32, 32])]; + int32 var_1783_axis_0 = const()[name = string("op_1783_axis_0"), val = int32(-1)]; + tensor var_1783_cast_fp16_0, tensor var_1783_cast_fp16_1 = split(axis = var_1783_axis_0, split_sizes = var_1782, x = normed_output_137_cast_fp16)[name = string("op_1783_cast_fp16")]; + tensor var_1786 = const()[name = string("op_1786"), val = tensor([32, 32])]; + int32 var_1787_axis_0 = const()[name = string("op_1787_axis_0"), val = int32(-1)]; + tensor var_1787_0, tensor var_1787_1 = split(axis = var_1787_axis_0, split_sizes = var_1786, x = var_160_cast_fp16)[name = string("op_1787")]; + tensor var_1790 = const()[name = string("op_1790"), val = tensor([32, 32])]; + int32 var_1791_axis_0 = const()[name = string("op_1791_axis_0"), val = int32(-1)]; + tensor var_1791_0, tensor var_1791_1 = split(axis = var_1791_axis_0, split_sizes = var_1790, x = var_163_cast_fp16)[name = string("op_1791")]; + tensor cos_85_axes_0 = const()[name = string("cos_85_axes_0"), val = tensor([2])]; + tensor cos_85 = expand_dims(axes = cos_85_axes_0, x = var_1787_0)[name = string("cos_85")]; + tensor sin_85_axes_0 = const()[name = string("sin_85_axes_0"), val = tensor([2])]; + tensor sin_85 = expand_dims(axes = sin_85_axes_0, x = var_1791_0)[name = string("sin_85")]; + tensor var_1796_cast_fp16 = mul(x = var_1783_cast_fp16_0, y = cos_85)[name = string("op_1796_cast_fp16")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_1783_cast_fp16_0)[name = string("x1_41_cast_fp16")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_1783_cast_fp16_0)[name = string("x2_41_cast_fp16")]; + fp16 const_134_promoted_to_fp16 = const()[name = string("const_134_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1807_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_134_promoted_to_fp16)[name = string("op_1807_cast_fp16")]; + bool var_1809_interleave_0 = const()[name = string("op_1809_interleave_0"), val = bool(false)]; + tensor var_1809_cast_fp16 = concat(axis = var_38, interleave = var_1809_interleave_0, values = (var_1807_cast_fp16, x1_41_cast_fp16))[name = string("op_1809_cast_fp16")]; + tensor var_1810_cast_fp16 = mul(x = var_1809_cast_fp16, y = sin_85)[name = string("op_1810_cast_fp16")]; + tensor var_1811_cast_fp16 = add(x = var_1796_cast_fp16, y = var_1810_cast_fp16)[name = string("op_1811_cast_fp16")]; + tensor cos_89_axes_0 = const()[name = string("cos_89_axes_0"), val = tensor([2])]; + tensor cos_89 = expand_dims(axes = cos_89_axes_0, x = var_1787_1)[name = string("cos_89")]; + tensor sin_89_axes_0 = const()[name = string("sin_89_axes_0"), val = tensor([2])]; + tensor sin_89 = expand_dims(axes = sin_89_axes_0, x = var_1791_1)[name = string("sin_89")]; + tensor var_1814_cast_fp16 = mul(x = var_1783_cast_fp16_1, y = cos_89)[name = string("op_1814_cast_fp16")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_1783_cast_fp16_1)[name = string("x1_43_cast_fp16")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_1783_cast_fp16_1)[name = string("x2_43_cast_fp16")]; + fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1825_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_137_promoted_to_fp16)[name = string("op_1825_cast_fp16")]; + bool var_1827_interleave_0 = const()[name = string("op_1827_interleave_0"), val = bool(false)]; + tensor var_1827_cast_fp16 = concat(axis = var_38, interleave = var_1827_interleave_0, values = (var_1825_cast_fp16, x1_43_cast_fp16))[name = string("op_1827_cast_fp16")]; + tensor var_1828_cast_fp16 = mul(x = var_1827_cast_fp16, y = sin_89)[name = string("op_1828_cast_fp16")]; + tensor var_1829_cast_fp16 = add(x = var_1814_cast_fp16, y = var_1828_cast_fp16)[name = string("op_1829_cast_fp16")]; + bool query_states_11_interleave_0 = const()[name = string("query_states_11_interleave_0"), val = bool(false)]; + tensor query_states_11_cast_fp16 = concat(axis = var_38, interleave = query_states_11_interleave_0, values = (var_1811_cast_fp16, var_1829_cast_fp16))[name = string("query_states_11_cast_fp16")]; + tensor model_vision_tower_encoder_layers_5_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131773696)))]; + tensor linear_37_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_5_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_70_cast_fp16)[name = string("linear_37_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.36p+4)]; + fp16 model_vision_tower_encoder_layers_5_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.34p+4)]; + tensor clip_73_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_37_cast_fp16)[name = string("clip_73_cast_fp16")]; + tensor var_1842 = const()[name = string("op_1842"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_307_cast_fp16 = reshape(shape = var_1842, x = clip_73_cast_fp16)[name = string("hidden_states_307_cast_fp16")]; + fp16 var_33_promoted_37_to_fp16 = const()[name = string("op_33_promoted_37_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1846_cast_fp16 = pow(x = hidden_states_307_cast_fp16, y = var_33_promoted_37_to_fp16)[name = string("op_1846_cast_fp16")]; + tensor var_1848_axes_0 = const()[name = string("op_1848_axes_0"), val = tensor([-1])]; + bool var_1848_keep_dims_0 = const()[name = string("op_1848_keep_dims_0"), val = bool(true)]; + tensor var_1848_cast_fp16 = reduce_mean(axes = var_1848_axes_0, keep_dims = var_1848_keep_dims_0, x = var_1846_cast_fp16)[name = string("op_1848_cast_fp16")]; + fp16 var_1849_to_fp16 = const()[name = string("op_1849_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_75_cast_fp16 = add(x = var_1848_cast_fp16, y = var_1849_to_fp16)[name = string("mean_squared_75_cast_fp16")]; + tensor var_1851_cast_fp16 = pow(x = mean_squared_75_cast_fp16, y = var_27_to_fp16)[name = string("op_1851_cast_fp16")]; + tensor normed_output_139_cast_fp16 = mul(x = hidden_states_307_cast_fp16, y = var_1851_cast_fp16)[name = string("normed_output_139_cast_fp16")]; + tensor const_138_to_fp16 = const()[name = string("const_138_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132953408)))]; + tensor normed_output_141_cast_fp16 = mul(x = normed_output_139_cast_fp16, y = const_138_to_fp16)[name = string("normed_output_141_cast_fp16")]; + tensor var_1871 = const()[name = string("op_1871"), val = tensor([32, 32])]; + int32 var_1872_axis_0 = const()[name = string("op_1872_axis_0"), val = int32(-1)]; + tensor var_1872_cast_fp16_0, tensor var_1872_cast_fp16_1 = split(axis = var_1872_axis_0, split_sizes = var_1871, x = normed_output_141_cast_fp16)[name = string("op_1872_cast_fp16")]; + tensor var_1875 = const()[name = string("op_1875"), val = tensor([32, 32])]; + int32 var_1876_axis_0 = const()[name = string("op_1876_axis_0"), val = int32(-1)]; + tensor var_1876_0, tensor var_1876_1 = split(axis = var_1876_axis_0, split_sizes = var_1875, x = var_160_cast_fp16)[name = string("op_1876")]; + tensor var_1879 = const()[name = string("op_1879"), val = tensor([32, 32])]; + int32 var_1880_axis_0 = const()[name = string("op_1880_axis_0"), val = int32(-1)]; + tensor var_1880_0, tensor var_1880_1 = split(axis = var_1880_axis_0, split_sizes = var_1879, x = var_163_cast_fp16)[name = string("op_1880")]; + tensor cos_93_axes_0 = const()[name = string("cos_93_axes_0"), val = tensor([2])]; + tensor cos_93 = expand_dims(axes = cos_93_axes_0, x = var_1876_0)[name = string("cos_93")]; + tensor sin_93_axes_0 = const()[name = string("sin_93_axes_0"), val = tensor([2])]; + tensor sin_93 = expand_dims(axes = sin_93_axes_0, x = var_1880_0)[name = string("sin_93")]; + tensor var_1885_cast_fp16 = mul(x = var_1872_cast_fp16_0, y = cos_93)[name = string("op_1885_cast_fp16")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_1872_cast_fp16_0)[name = string("x1_45_cast_fp16")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_1872_cast_fp16_0)[name = string("x2_45_cast_fp16")]; + fp16 const_143_promoted_to_fp16 = const()[name = string("const_143_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1896_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_143_promoted_to_fp16)[name = string("op_1896_cast_fp16")]; + bool var_1898_interleave_0 = const()[name = string("op_1898_interleave_0"), val = bool(false)]; + tensor var_1898_cast_fp16 = concat(axis = var_38, interleave = var_1898_interleave_0, values = (var_1896_cast_fp16, x1_45_cast_fp16))[name = string("op_1898_cast_fp16")]; + tensor var_1899_cast_fp16 = mul(x = var_1898_cast_fp16, y = sin_93)[name = string("op_1899_cast_fp16")]; + tensor var_1900_cast_fp16 = add(x = var_1885_cast_fp16, y = var_1899_cast_fp16)[name = string("op_1900_cast_fp16")]; + tensor cos_97_axes_0 = const()[name = string("cos_97_axes_0"), val = tensor([2])]; + tensor cos_97 = expand_dims(axes = cos_97_axes_0, x = var_1876_1)[name = string("cos_97")]; + tensor sin_97_axes_0 = const()[name = string("sin_97_axes_0"), val = tensor([2])]; + tensor sin_97 = expand_dims(axes = sin_97_axes_0, x = var_1880_1)[name = string("sin_97")]; + tensor var_1903_cast_fp16 = mul(x = var_1872_cast_fp16_1, y = cos_97)[name = string("op_1903_cast_fp16")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_1872_cast_fp16_1)[name = string("x1_47_cast_fp16")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_1872_cast_fp16_1)[name = string("x2_47_cast_fp16")]; + fp16 const_146_promoted_to_fp16 = const()[name = string("const_146_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1914_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_146_promoted_to_fp16)[name = string("op_1914_cast_fp16")]; + bool var_1916_interleave_0 = const()[name = string("op_1916_interleave_0"), val = bool(false)]; + tensor var_1916_cast_fp16 = concat(axis = var_38, interleave = var_1916_interleave_0, values = (var_1914_cast_fp16, x1_47_cast_fp16))[name = string("op_1916_cast_fp16")]; + tensor var_1917_cast_fp16 = mul(x = var_1916_cast_fp16, y = sin_97)[name = string("op_1917_cast_fp16")]; + tensor var_1918_cast_fp16 = add(x = var_1903_cast_fp16, y = var_1917_cast_fp16)[name = string("op_1918_cast_fp16")]; + bool key_states_11_interleave_0 = const()[name = string("key_states_11_interleave_0"), val = bool(false)]; + tensor key_states_11_cast_fp16 = concat(axis = var_38, interleave = key_states_11_interleave_0, values = (var_1900_cast_fp16, var_1918_cast_fp16))[name = string("key_states_11_cast_fp16")]; + tensor model_vision_tower_encoder_layers_5_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132953600)))]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_5_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_70_cast_fp16)[name = string("linear_38_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.36p+4)]; + fp16 model_vision_tower_encoder_layers_5_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.34p+4)]; + tensor clip_75_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_38_cast_fp16)[name = string("clip_75_cast_fp16")]; + tensor var_1931 = const()[name = string("op_1931"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_313_cast_fp16 = reshape(shape = var_1931, x = clip_75_cast_fp16)[name = string("hidden_states_313_cast_fp16")]; + fp16 var_33_promoted_38_to_fp16 = const()[name = string("op_33_promoted_38_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1934_cast_fp16 = pow(x = hidden_states_313_cast_fp16, y = var_33_promoted_38_to_fp16)[name = string("op_1934_cast_fp16")]; + tensor var_1936_axes_0 = const()[name = string("op_1936_axes_0"), val = tensor([-1])]; + bool var_1936_keep_dims_0 = const()[name = string("op_1936_keep_dims_0"), val = bool(true)]; + tensor var_1936_cast_fp16 = reduce_mean(axes = var_1936_axes_0, keep_dims = var_1936_keep_dims_0, x = var_1934_cast_fp16)[name = string("op_1936_cast_fp16")]; + fp16 var_1937_to_fp16 = const()[name = string("op_1937_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_77_cast_fp16 = add(x = var_1936_cast_fp16, y = var_1937_to_fp16)[name = string("mean_squared_77_cast_fp16")]; + tensor var_1939_cast_fp16 = pow(x = mean_squared_77_cast_fp16, y = var_27_to_fp16)[name = string("op_1939_cast_fp16")]; + tensor normed_output_143_cast_fp16 = mul(x = hidden_states_313_cast_fp16, y = var_1939_cast_fp16)[name = string("normed_output_143_cast_fp16")]; + tensor hidden_states_319_perm_0 = const()[name = string("hidden_states_319_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(true)]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + tensor transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_75 = transpose(perm = transpose_75_perm_0, x = key_states_11_cast_fp16)[name = string("transpose_137")]; + tensor transpose_74 = transpose(perm = transpose_74_perm_0, x = query_states_11_cast_fp16)[name = string("transpose_138")]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("matmul_5_cast_fp16")]; + tensor add_5_cast_fp16 = add(x = matmul_5_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_5_cast_fp16")]; + int32 softmax_5_axis_0 = const()[name = string("softmax_5_axis_0"), val = int32(-1)]; + tensor softmax_5_cast_fp16 = softmax(axis = softmax_5_axis_0, x = add_5_cast_fp16)[name = string("softmax_5_cast_fp16")]; + bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; + bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; + tensor hidden_states_319_cast_fp16 = transpose(perm = hidden_states_319_perm_0, x = normed_output_143_cast_fp16)[name = string("transpose_139")]; + tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = softmax_5_cast_fp16, y = hidden_states_319_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1944_perm_0 = const()[name = string("op_1944_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1946 = const()[name = string("op_1946"), val = tensor([1, 2304, -1])]; + tensor var_1944_cast_fp16 = transpose(perm = var_1944_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_136")]; + tensor var_1947_cast_fp16 = reshape(shape = var_1946, x = var_1944_cast_fp16)[name = string("op_1947_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.6cp+1)]; + fp16 model_vision_tower_encoder_layers_5_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.6ap+1)]; + tensor clip_76_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_self_attn_o_proj_input_max_promoted_to_fp16, x = var_1947_cast_fp16)[name = string("clip_76_cast_fp16")]; + tensor model_vision_tower_encoder_layers_5_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134133312)))]; + tensor linear_39_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_5_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_76_cast_fp16)[name = string("linear_39_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.32p+2)]; + fp16 model_vision_tower_encoder_layers_5_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.3p+2)]; + tensor clip_77_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_39_cast_fp16)[name = string("clip_77_cast_fp16")]; + fp16 var_33_promoted_39_to_fp16 = const()[name = string("op_33_promoted_39_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1960_cast_fp16 = pow(x = clip_77_cast_fp16, y = var_33_promoted_39_to_fp16)[name = string("op_1960_cast_fp16")]; + tensor var_1962_axes_0 = const()[name = string("op_1962_axes_0"), val = tensor([-1])]; + bool var_1962_keep_dims_0 = const()[name = string("op_1962_keep_dims_0"), val = bool(true)]; + tensor var_1962_cast_fp16 = reduce_mean(axes = var_1962_axes_0, keep_dims = var_1962_keep_dims_0, x = var_1960_cast_fp16)[name = string("op_1962_cast_fp16")]; + fp16 var_1963_to_fp16 = const()[name = string("op_1963_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_79_cast_fp16 = add(x = var_1962_cast_fp16, y = var_1963_to_fp16)[name = string("mean_squared_79_cast_fp16")]; + tensor var_1965_cast_fp16 = pow(x = mean_squared_79_cast_fp16, y = var_27_to_fp16)[name = string("op_1965_cast_fp16")]; + tensor normed_output_145_cast_fp16 = mul(x = clip_77_cast_fp16, y = var_1965_cast_fp16)[name = string("normed_output_145_cast_fp16")]; + tensor const_147_to_fp16 = const()[name = string("const_147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135313024)))]; + tensor normed_output_147_cast_fp16 = mul(x = normed_output_145_cast_fp16, y = const_147_to_fp16)[name = string("normed_output_147_cast_fp16")]; + tensor hidden_states_331_cast_fp16 = add(x = hidden_states_293_cast_fp16, y = normed_output_147_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; + fp16 var_33_promoted_40_to_fp16 = const()[name = string("op_33_promoted_40_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1973_cast_fp16 = pow(x = hidden_states_331_cast_fp16, y = var_33_promoted_40_to_fp16)[name = string("op_1973_cast_fp16")]; + tensor var_1975_axes_0 = const()[name = string("op_1975_axes_0"), val = tensor([-1])]; + bool var_1975_keep_dims_0 = const()[name = string("op_1975_keep_dims_0"), val = bool(true)]; + tensor var_1975_cast_fp16 = reduce_mean(axes = var_1975_axes_0, keep_dims = var_1975_keep_dims_0, x = var_1973_cast_fp16)[name = string("op_1975_cast_fp16")]; + fp16 var_1976_to_fp16 = const()[name = string("op_1976_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_81_cast_fp16 = add(x = var_1975_cast_fp16, y = var_1976_to_fp16)[name = string("mean_squared_81_cast_fp16")]; + tensor var_1978_cast_fp16 = pow(x = mean_squared_81_cast_fp16, y = var_27_to_fp16)[name = string("op_1978_cast_fp16")]; + tensor normed_output_149_cast_fp16 = mul(x = hidden_states_331_cast_fp16, y = var_1978_cast_fp16)[name = string("normed_output_149_cast_fp16")]; + tensor const_148_to_fp16 = const()[name = string("const_148_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135314624)))]; + tensor normed_output_151_cast_fp16 = mul(x = normed_output_149_cast_fp16, y = const_148_to_fp16)[name = string("normed_output_151_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.3cp+3)]; + fp16 model_vision_tower_encoder_layers_5_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.3ap+3)]; + tensor clip_78_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_151_cast_fp16)[name = string("clip_78_cast_fp16")]; + tensor model_vision_tower_encoder_layers_5_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135316224)))]; + tensor linear_40_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_5_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_78_cast_fp16)[name = string("linear_40_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.84p+3)]; + fp16 model_vision_tower_encoder_layers_5_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.82p+3)]; + tensor clip_79_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_40_cast_fp16)[name = string("clip_79_cast_fp16")]; + string var_1995_mode_0 = const()[name = string("op_1995_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_1995_cast_fp16 = gelu(mode = var_1995_mode_0, x = clip_79_cast_fp16)[name = string("op_1995_cast_fp16")]; + tensor model_vision_tower_encoder_layers_5_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140034880)))]; + tensor linear_41_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_5_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_78_cast_fp16)[name = string("linear_41_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.84p+3)]; + fp16 model_vision_tower_encoder_layers_5_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.82p+3)]; + tensor clip_81_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_mlp_up_proj_output_max_promoted_to_fp16, x = linear_41_cast_fp16)[name = string("clip_81_cast_fp16")]; + tensor hidden_states_341_cast_fp16 = mul(x = var_1995_cast_fp16, y = clip_81_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.82p+5)]; + fp16 model_vision_tower_encoder_layers_5_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.7ep+5)]; + tensor clip_82_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_341_cast_fp16)[name = string("clip_82_cast_fp16")]; + tensor model_vision_tower_encoder_layers_5_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144753536)))]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_5_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_82_cast_fp16)[name = string("linear_42_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_5_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.f2p+4)]; + fp16 model_vision_tower_encoder_layers_5_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_5_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.eep+4)]; + tensor clip_83_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_5_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_5_mlp_down_proj_output_max_promoted_to_fp16, x = linear_42_cast_fp16)[name = string("clip_83_cast_fp16")]; + fp16 var_33_promoted_41_to_fp16 = const()[name = string("op_33_promoted_41_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2017_cast_fp16 = pow(x = clip_83_cast_fp16, y = var_33_promoted_41_to_fp16)[name = string("op_2017_cast_fp16")]; + tensor var_2019_axes_0 = const()[name = string("op_2019_axes_0"), val = tensor([-1])]; + bool var_2019_keep_dims_0 = const()[name = string("op_2019_keep_dims_0"), val = bool(true)]; + tensor var_2019_cast_fp16 = reduce_mean(axes = var_2019_axes_0, keep_dims = var_2019_keep_dims_0, x = var_2017_cast_fp16)[name = string("op_2019_cast_fp16")]; + fp16 var_2020_to_fp16 = const()[name = string("op_2020_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_83_cast_fp16 = add(x = var_2019_cast_fp16, y = var_2020_to_fp16)[name = string("mean_squared_83_cast_fp16")]; + tensor var_2022_cast_fp16 = pow(x = mean_squared_83_cast_fp16, y = var_27_to_fp16)[name = string("op_2022_cast_fp16")]; + tensor normed_output_153_cast_fp16 = mul(x = clip_83_cast_fp16, y = var_2022_cast_fp16)[name = string("normed_output_153_cast_fp16")]; + tensor const_149_to_fp16 = const()[name = string("const_149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149472192)))]; + tensor normed_output_155_cast_fp16 = mul(x = normed_output_153_cast_fp16, y = const_149_to_fp16)[name = string("normed_output_155_cast_fp16")]; + tensor hidden_states_351_cast_fp16 = add(x = hidden_states_331_cast_fp16, y = normed_output_155_cast_fp16)[name = string("hidden_states_351_cast_fp16")]; + fp16 var_33_promoted_42_to_fp16 = const()[name = string("op_33_promoted_42_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2036_cast_fp16 = pow(x = hidden_states_351_cast_fp16, y = var_33_promoted_42_to_fp16)[name = string("op_2036_cast_fp16")]; + tensor var_2038_axes_0 = const()[name = string("op_2038_axes_0"), val = tensor([-1])]; + bool var_2038_keep_dims_0 = const()[name = string("op_2038_keep_dims_0"), val = bool(true)]; + tensor var_2038_cast_fp16 = reduce_mean(axes = var_2038_axes_0, keep_dims = var_2038_keep_dims_0, x = var_2036_cast_fp16)[name = string("op_2038_cast_fp16")]; + fp16 var_2039_to_fp16 = const()[name = string("op_2039_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_85_cast_fp16 = add(x = var_2038_cast_fp16, y = var_2039_to_fp16)[name = string("mean_squared_85_cast_fp16")]; + tensor var_2041_cast_fp16 = pow(x = mean_squared_85_cast_fp16, y = var_27_to_fp16)[name = string("op_2041_cast_fp16")]; + tensor normed_output_157_cast_fp16 = mul(x = hidden_states_351_cast_fp16, y = var_2041_cast_fp16)[name = string("normed_output_157_cast_fp16")]; + tensor const_150_to_fp16 = const()[name = string("const_150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149473792)))]; + tensor normed_output_159_cast_fp16 = mul(x = normed_output_157_cast_fp16, y = const_150_to_fp16)[name = string("normed_output_159_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.42p+3)]; + fp16 model_vision_tower_encoder_layers_6_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.4p+3)]; + tensor clip_84_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_159_cast_fp16)[name = string("clip_84_cast_fp16")]; + tensor model_vision_tower_encoder_layers_6_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149475392)))]; + tensor linear_43_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_6_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_84_cast_fp16)[name = string("linear_43_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.ccp+3)]; + fp16 model_vision_tower_encoder_layers_6_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.cap+3)]; + tensor clip_85_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_43_cast_fp16)[name = string("clip_85_cast_fp16")]; + tensor var_2063 = const()[name = string("op_2063"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_359_cast_fp16 = reshape(shape = var_2063, x = clip_85_cast_fp16)[name = string("hidden_states_359_cast_fp16")]; + fp16 var_33_promoted_43_to_fp16 = const()[name = string("op_33_promoted_43_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2067_cast_fp16 = pow(x = hidden_states_359_cast_fp16, y = var_33_promoted_43_to_fp16)[name = string("op_2067_cast_fp16")]; + tensor var_2069_axes_0 = const()[name = string("op_2069_axes_0"), val = tensor([-1])]; + bool var_2069_keep_dims_0 = const()[name = string("op_2069_keep_dims_0"), val = bool(true)]; + tensor var_2069_cast_fp16 = reduce_mean(axes = var_2069_axes_0, keep_dims = var_2069_keep_dims_0, x = var_2067_cast_fp16)[name = string("op_2069_cast_fp16")]; + fp16 var_2070_to_fp16 = const()[name = string("op_2070_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_87_cast_fp16 = add(x = var_2069_cast_fp16, y = var_2070_to_fp16)[name = string("mean_squared_87_cast_fp16")]; + tensor var_2072_cast_fp16 = pow(x = mean_squared_87_cast_fp16, y = var_27_to_fp16)[name = string("op_2072_cast_fp16")]; + tensor normed_output_161_cast_fp16 = mul(x = hidden_states_359_cast_fp16, y = var_2072_cast_fp16)[name = string("normed_output_161_cast_fp16")]; + tensor const_153_to_fp16 = const()[name = string("const_153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150655104)))]; + tensor normed_output_163_cast_fp16 = mul(x = normed_output_161_cast_fp16, y = const_153_to_fp16)[name = string("normed_output_163_cast_fp16")]; + tensor var_2092 = const()[name = string("op_2092"), val = tensor([32, 32])]; + int32 var_2093_axis_0 = const()[name = string("op_2093_axis_0"), val = int32(-1)]; + tensor var_2093_cast_fp16_0, tensor var_2093_cast_fp16_1 = split(axis = var_2093_axis_0, split_sizes = var_2092, x = normed_output_163_cast_fp16)[name = string("op_2093_cast_fp16")]; + tensor var_2096 = const()[name = string("op_2096"), val = tensor([32, 32])]; + int32 var_2097_axis_0 = const()[name = string("op_2097_axis_0"), val = int32(-1)]; + tensor var_2097_0, tensor var_2097_1 = split(axis = var_2097_axis_0, split_sizes = var_2096, x = var_160_cast_fp16)[name = string("op_2097")]; + tensor var_2100 = const()[name = string("op_2100"), val = tensor([32, 32])]; + int32 var_2101_axis_0 = const()[name = string("op_2101_axis_0"), val = int32(-1)]; + tensor var_2101_0, tensor var_2101_1 = split(axis = var_2101_axis_0, split_sizes = var_2100, x = var_163_cast_fp16)[name = string("op_2101")]; + tensor cos_101_axes_0 = const()[name = string("cos_101_axes_0"), val = tensor([2])]; + tensor cos_101 = expand_dims(axes = cos_101_axes_0, x = var_2097_0)[name = string("cos_101")]; + tensor sin_101_axes_0 = const()[name = string("sin_101_axes_0"), val = tensor([2])]; + tensor sin_101 = expand_dims(axes = sin_101_axes_0, x = var_2101_0)[name = string("sin_101")]; + tensor var_2106_cast_fp16 = mul(x = var_2093_cast_fp16_0, y = cos_101)[name = string("op_2106_cast_fp16")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_2093_cast_fp16_0)[name = string("x1_49_cast_fp16")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_2093_cast_fp16_0)[name = string("x2_49_cast_fp16")]; + fp16 const_158_promoted_to_fp16 = const()[name = string("const_158_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2117_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_158_promoted_to_fp16)[name = string("op_2117_cast_fp16")]; + bool var_2119_interleave_0 = const()[name = string("op_2119_interleave_0"), val = bool(false)]; + tensor var_2119_cast_fp16 = concat(axis = var_38, interleave = var_2119_interleave_0, values = (var_2117_cast_fp16, x1_49_cast_fp16))[name = string("op_2119_cast_fp16")]; + tensor var_2120_cast_fp16 = mul(x = var_2119_cast_fp16, y = sin_101)[name = string("op_2120_cast_fp16")]; + tensor var_2121_cast_fp16 = add(x = var_2106_cast_fp16, y = var_2120_cast_fp16)[name = string("op_2121_cast_fp16")]; + tensor cos_105_axes_0 = const()[name = string("cos_105_axes_0"), val = tensor([2])]; + tensor cos_105 = expand_dims(axes = cos_105_axes_0, x = var_2097_1)[name = string("cos_105")]; + tensor sin_105_axes_0 = const()[name = string("sin_105_axes_0"), val = tensor([2])]; + tensor sin_105 = expand_dims(axes = sin_105_axes_0, x = var_2101_1)[name = string("sin_105")]; + tensor var_2124_cast_fp16 = mul(x = var_2093_cast_fp16_1, y = cos_105)[name = string("op_2124_cast_fp16")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_2093_cast_fp16_1)[name = string("x1_51_cast_fp16")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_2093_cast_fp16_1)[name = string("x2_51_cast_fp16")]; + fp16 const_161_promoted_to_fp16 = const()[name = string("const_161_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2135_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_161_promoted_to_fp16)[name = string("op_2135_cast_fp16")]; + bool var_2137_interleave_0 = const()[name = string("op_2137_interleave_0"), val = bool(false)]; + tensor var_2137_cast_fp16 = concat(axis = var_38, interleave = var_2137_interleave_0, values = (var_2135_cast_fp16, x1_51_cast_fp16))[name = string("op_2137_cast_fp16")]; + tensor var_2138_cast_fp16 = mul(x = var_2137_cast_fp16, y = sin_105)[name = string("op_2138_cast_fp16")]; + tensor var_2139_cast_fp16 = add(x = var_2124_cast_fp16, y = var_2138_cast_fp16)[name = string("op_2139_cast_fp16")]; + bool query_states_13_interleave_0 = const()[name = string("query_states_13_interleave_0"), val = bool(false)]; + tensor query_states_13_cast_fp16 = concat(axis = var_38, interleave = query_states_13_interleave_0, values = (var_2121_cast_fp16, var_2139_cast_fp16))[name = string("query_states_13_cast_fp16")]; + tensor model_vision_tower_encoder_layers_6_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150655296)))]; + tensor linear_44_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_6_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_84_cast_fp16)[name = string("linear_44_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.26p+4)]; + fp16 model_vision_tower_encoder_layers_6_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.24p+4)]; + tensor clip_87_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_44_cast_fp16)[name = string("clip_87_cast_fp16")]; + tensor var_2152 = const()[name = string("op_2152"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_365_cast_fp16 = reshape(shape = var_2152, x = clip_87_cast_fp16)[name = string("hidden_states_365_cast_fp16")]; + fp16 var_33_promoted_44_to_fp16 = const()[name = string("op_33_promoted_44_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2156_cast_fp16 = pow(x = hidden_states_365_cast_fp16, y = var_33_promoted_44_to_fp16)[name = string("op_2156_cast_fp16")]; + tensor var_2158_axes_0 = const()[name = string("op_2158_axes_0"), val = tensor([-1])]; + bool var_2158_keep_dims_0 = const()[name = string("op_2158_keep_dims_0"), val = bool(true)]; + tensor var_2158_cast_fp16 = reduce_mean(axes = var_2158_axes_0, keep_dims = var_2158_keep_dims_0, x = var_2156_cast_fp16)[name = string("op_2158_cast_fp16")]; + fp16 var_2159_to_fp16 = const()[name = string("op_2159_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_89_cast_fp16 = add(x = var_2158_cast_fp16, y = var_2159_to_fp16)[name = string("mean_squared_89_cast_fp16")]; + tensor var_2161_cast_fp16 = pow(x = mean_squared_89_cast_fp16, y = var_27_to_fp16)[name = string("op_2161_cast_fp16")]; + tensor normed_output_165_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = var_2161_cast_fp16)[name = string("normed_output_165_cast_fp16")]; + tensor const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151835008)))]; + tensor normed_output_167_cast_fp16 = mul(x = normed_output_165_cast_fp16, y = const_162_to_fp16)[name = string("normed_output_167_cast_fp16")]; + tensor var_2181 = const()[name = string("op_2181"), val = tensor([32, 32])]; + int32 var_2182_axis_0 = const()[name = string("op_2182_axis_0"), val = int32(-1)]; + tensor var_2182_cast_fp16_0, tensor var_2182_cast_fp16_1 = split(axis = var_2182_axis_0, split_sizes = var_2181, x = normed_output_167_cast_fp16)[name = string("op_2182_cast_fp16")]; + tensor var_2185 = const()[name = string("op_2185"), val = tensor([32, 32])]; + int32 var_2186_axis_0 = const()[name = string("op_2186_axis_0"), val = int32(-1)]; + tensor var_2186_0, tensor var_2186_1 = split(axis = var_2186_axis_0, split_sizes = var_2185, x = var_160_cast_fp16)[name = string("op_2186")]; + tensor var_2189 = const()[name = string("op_2189"), val = tensor([32, 32])]; + int32 var_2190_axis_0 = const()[name = string("op_2190_axis_0"), val = int32(-1)]; + tensor var_2190_0, tensor var_2190_1 = split(axis = var_2190_axis_0, split_sizes = var_2189, x = var_163_cast_fp16)[name = string("op_2190")]; + tensor cos_109_axes_0 = const()[name = string("cos_109_axes_0"), val = tensor([2])]; + tensor cos_109 = expand_dims(axes = cos_109_axes_0, x = var_2186_0)[name = string("cos_109")]; + tensor sin_109_axes_0 = const()[name = string("sin_109_axes_0"), val = tensor([2])]; + tensor sin_109 = expand_dims(axes = sin_109_axes_0, x = var_2190_0)[name = string("sin_109")]; + tensor var_2195_cast_fp16 = mul(x = var_2182_cast_fp16_0, y = cos_109)[name = string("op_2195_cast_fp16")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_2182_cast_fp16_0)[name = string("x1_53_cast_fp16")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_2182_cast_fp16_0)[name = string("x2_53_cast_fp16")]; + fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2206_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_167_promoted_to_fp16)[name = string("op_2206_cast_fp16")]; + bool var_2208_interleave_0 = const()[name = string("op_2208_interleave_0"), val = bool(false)]; + tensor var_2208_cast_fp16 = concat(axis = var_38, interleave = var_2208_interleave_0, values = (var_2206_cast_fp16, x1_53_cast_fp16))[name = string("op_2208_cast_fp16")]; + tensor var_2209_cast_fp16 = mul(x = var_2208_cast_fp16, y = sin_109)[name = string("op_2209_cast_fp16")]; + tensor var_2210_cast_fp16 = add(x = var_2195_cast_fp16, y = var_2209_cast_fp16)[name = string("op_2210_cast_fp16")]; + tensor cos_113_axes_0 = const()[name = string("cos_113_axes_0"), val = tensor([2])]; + tensor cos_113 = expand_dims(axes = cos_113_axes_0, x = var_2186_1)[name = string("cos_113")]; + tensor sin_113_axes_0 = const()[name = string("sin_113_axes_0"), val = tensor([2])]; + tensor sin_113 = expand_dims(axes = sin_113_axes_0, x = var_2190_1)[name = string("sin_113")]; + tensor var_2213_cast_fp16 = mul(x = var_2182_cast_fp16_1, y = cos_113)[name = string("op_2213_cast_fp16")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_2182_cast_fp16_1)[name = string("x1_55_cast_fp16")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_2182_cast_fp16_1)[name = string("x2_55_cast_fp16")]; + fp16 const_170_promoted_to_fp16 = const()[name = string("const_170_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2224_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_170_promoted_to_fp16)[name = string("op_2224_cast_fp16")]; + bool var_2226_interleave_0 = const()[name = string("op_2226_interleave_0"), val = bool(false)]; + tensor var_2226_cast_fp16 = concat(axis = var_38, interleave = var_2226_interleave_0, values = (var_2224_cast_fp16, x1_55_cast_fp16))[name = string("op_2226_cast_fp16")]; + tensor var_2227_cast_fp16 = mul(x = var_2226_cast_fp16, y = sin_113)[name = string("op_2227_cast_fp16")]; + tensor var_2228_cast_fp16 = add(x = var_2213_cast_fp16, y = var_2227_cast_fp16)[name = string("op_2228_cast_fp16")]; + bool key_states_13_interleave_0 = const()[name = string("key_states_13_interleave_0"), val = bool(false)]; + tensor key_states_13_cast_fp16 = concat(axis = var_38, interleave = key_states_13_interleave_0, values = (var_2210_cast_fp16, var_2228_cast_fp16))[name = string("key_states_13_cast_fp16")]; + tensor model_vision_tower_encoder_layers_6_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151835200)))]; + tensor linear_45_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_6_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_84_cast_fp16)[name = string("linear_45_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.26p+4)]; + fp16 model_vision_tower_encoder_layers_6_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.24p+4)]; + tensor clip_89_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_45_cast_fp16)[name = string("clip_89_cast_fp16")]; + tensor var_2241 = const()[name = string("op_2241"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_371_cast_fp16 = reshape(shape = var_2241, x = clip_89_cast_fp16)[name = string("hidden_states_371_cast_fp16")]; + fp16 var_33_promoted_45_to_fp16 = const()[name = string("op_33_promoted_45_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2244_cast_fp16 = pow(x = hidden_states_371_cast_fp16, y = var_33_promoted_45_to_fp16)[name = string("op_2244_cast_fp16")]; + tensor var_2246_axes_0 = const()[name = string("op_2246_axes_0"), val = tensor([-1])]; + bool var_2246_keep_dims_0 = const()[name = string("op_2246_keep_dims_0"), val = bool(true)]; + tensor var_2246_cast_fp16 = reduce_mean(axes = var_2246_axes_0, keep_dims = var_2246_keep_dims_0, x = var_2244_cast_fp16)[name = string("op_2246_cast_fp16")]; + fp16 var_2247_to_fp16 = const()[name = string("op_2247_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_91_cast_fp16 = add(x = var_2246_cast_fp16, y = var_2247_to_fp16)[name = string("mean_squared_91_cast_fp16")]; + tensor var_2249_cast_fp16 = pow(x = mean_squared_91_cast_fp16, y = var_27_to_fp16)[name = string("op_2249_cast_fp16")]; + tensor normed_output_169_cast_fp16 = mul(x = hidden_states_371_cast_fp16, y = var_2249_cast_fp16)[name = string("normed_output_169_cast_fp16")]; + tensor hidden_states_377_perm_0 = const()[name = string("hidden_states_377_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(true)]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + tensor transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_77 = transpose(perm = transpose_77_perm_0, x = key_states_13_cast_fp16)[name = string("transpose_133")]; + tensor transpose_76 = transpose(perm = transpose_76_perm_0, x = query_states_13_cast_fp16)[name = string("transpose_134")]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("matmul_6_cast_fp16")]; + tensor add_6_cast_fp16 = add(x = matmul_6_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_6_cast_fp16")]; + int32 softmax_6_axis_0 = const()[name = string("softmax_6_axis_0"), val = int32(-1)]; + tensor softmax_6_cast_fp16 = softmax(axis = softmax_6_axis_0, x = add_6_cast_fp16)[name = string("softmax_6_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor hidden_states_377_cast_fp16 = transpose(perm = hidden_states_377_perm_0, x = normed_output_169_cast_fp16)[name = string("transpose_135")]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = softmax_6_cast_fp16, y = hidden_states_377_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_2254_perm_0 = const()[name = string("op_2254_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2256 = const()[name = string("op_2256"), val = tensor([1, 2304, -1])]; + tensor var_2254_cast_fp16 = transpose(perm = var_2254_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_132")]; + tensor var_2257_cast_fp16 = reshape(shape = var_2256, x = var_2254_cast_fp16)[name = string("op_2257_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.2ap+1)]; + fp16 model_vision_tower_encoder_layers_6_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.28p+1)]; + tensor clip_90_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_self_attn_o_proj_input_max_promoted_to_fp16, x = var_2257_cast_fp16)[name = string("clip_90_cast_fp16")]; + tensor model_vision_tower_encoder_layers_6_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153014912)))]; + tensor linear_46_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_6_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_90_cast_fp16)[name = string("linear_46_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.06p+2)]; + fp16 model_vision_tower_encoder_layers_6_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.04p+2)]; + tensor clip_91_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_46_cast_fp16)[name = string("clip_91_cast_fp16")]; + fp16 var_33_promoted_46_to_fp16 = const()[name = string("op_33_promoted_46_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2270_cast_fp16 = pow(x = clip_91_cast_fp16, y = var_33_promoted_46_to_fp16)[name = string("op_2270_cast_fp16")]; + tensor var_2272_axes_0 = const()[name = string("op_2272_axes_0"), val = tensor([-1])]; + bool var_2272_keep_dims_0 = const()[name = string("op_2272_keep_dims_0"), val = bool(true)]; + tensor var_2272_cast_fp16 = reduce_mean(axes = var_2272_axes_0, keep_dims = var_2272_keep_dims_0, x = var_2270_cast_fp16)[name = string("op_2272_cast_fp16")]; + fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_93_cast_fp16 = add(x = var_2272_cast_fp16, y = var_2273_to_fp16)[name = string("mean_squared_93_cast_fp16")]; + tensor var_2275_cast_fp16 = pow(x = mean_squared_93_cast_fp16, y = var_27_to_fp16)[name = string("op_2275_cast_fp16")]; + tensor normed_output_171_cast_fp16 = mul(x = clip_91_cast_fp16, y = var_2275_cast_fp16)[name = string("normed_output_171_cast_fp16")]; + tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154194624)))]; + tensor normed_output_173_cast_fp16 = mul(x = normed_output_171_cast_fp16, y = const_171_to_fp16)[name = string("normed_output_173_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_351_cast_fp16, y = normed_output_173_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + fp16 var_33_promoted_47_to_fp16 = const()[name = string("op_33_promoted_47_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2283_cast_fp16 = pow(x = hidden_states_389_cast_fp16, y = var_33_promoted_47_to_fp16)[name = string("op_2283_cast_fp16")]; + tensor var_2285_axes_0 = const()[name = string("op_2285_axes_0"), val = tensor([-1])]; + bool var_2285_keep_dims_0 = const()[name = string("op_2285_keep_dims_0"), val = bool(true)]; + tensor var_2285_cast_fp16 = reduce_mean(axes = var_2285_axes_0, keep_dims = var_2285_keep_dims_0, x = var_2283_cast_fp16)[name = string("op_2285_cast_fp16")]; + fp16 var_2286_to_fp16 = const()[name = string("op_2286_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_95_cast_fp16 = add(x = var_2285_cast_fp16, y = var_2286_to_fp16)[name = string("mean_squared_95_cast_fp16")]; + tensor var_2288_cast_fp16 = pow(x = mean_squared_95_cast_fp16, y = var_27_to_fp16)[name = string("op_2288_cast_fp16")]; + tensor normed_output_175_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = var_2288_cast_fp16)[name = string("normed_output_175_cast_fp16")]; + tensor const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154196224)))]; + tensor normed_output_177_cast_fp16 = mul(x = normed_output_175_cast_fp16, y = const_172_to_fp16)[name = string("normed_output_177_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.86p+3)]; + fp16 model_vision_tower_encoder_layers_6_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.82p+3)]; + tensor clip_92_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_177_cast_fp16)[name = string("clip_92_cast_fp16")]; + tensor model_vision_tower_encoder_layers_6_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154197824)))]; + tensor linear_47_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_6_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_92_cast_fp16)[name = string("linear_47_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.e6p+3)]; + fp16 model_vision_tower_encoder_layers_6_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.e2p+3)]; + tensor clip_93_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_47_cast_fp16)[name = string("clip_93_cast_fp16")]; + string var_2305_mode_0 = const()[name = string("op_2305_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2305_cast_fp16 = gelu(mode = var_2305_mode_0, x = clip_93_cast_fp16)[name = string("op_2305_cast_fp16")]; + tensor model_vision_tower_encoder_layers_6_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158916480)))]; + tensor linear_48_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_6_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_92_cast_fp16)[name = string("linear_48_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.e6p+3)]; + fp16 model_vision_tower_encoder_layers_6_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.e2p+3)]; + tensor clip_95_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_mlp_up_proj_output_max_promoted_to_fp16, x = linear_48_cast_fp16)[name = string("clip_95_cast_fp16")]; + tensor hidden_states_399_cast_fp16 = mul(x = var_2305_cast_fp16, y = clip_95_cast_fp16)[name = string("hidden_states_399_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.1ep+6)]; + fp16 model_vision_tower_encoder_layers_6_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.1cp+6)]; + tensor clip_96_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_399_cast_fp16)[name = string("clip_96_cast_fp16")]; + tensor model_vision_tower_encoder_layers_6_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163635136)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_6_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_96_cast_fp16)[name = string("linear_49_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_6_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.34p+5)]; + fp16 model_vision_tower_encoder_layers_6_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_6_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.3p+5)]; + tensor clip_97_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_6_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_6_mlp_down_proj_output_max_promoted_to_fp16, x = linear_49_cast_fp16)[name = string("clip_97_cast_fp16")]; + fp16 var_33_promoted_48_to_fp16 = const()[name = string("op_33_promoted_48_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2327_cast_fp16 = pow(x = clip_97_cast_fp16, y = var_33_promoted_48_to_fp16)[name = string("op_2327_cast_fp16")]; + tensor var_2329_axes_0 = const()[name = string("op_2329_axes_0"), val = tensor([-1])]; + bool var_2329_keep_dims_0 = const()[name = string("op_2329_keep_dims_0"), val = bool(true)]; + tensor var_2329_cast_fp16 = reduce_mean(axes = var_2329_axes_0, keep_dims = var_2329_keep_dims_0, x = var_2327_cast_fp16)[name = string("op_2329_cast_fp16")]; + fp16 var_2330_to_fp16 = const()[name = string("op_2330_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_97_cast_fp16 = add(x = var_2329_cast_fp16, y = var_2330_to_fp16)[name = string("mean_squared_97_cast_fp16")]; + tensor var_2332_cast_fp16 = pow(x = mean_squared_97_cast_fp16, y = var_27_to_fp16)[name = string("op_2332_cast_fp16")]; + tensor normed_output_179_cast_fp16 = mul(x = clip_97_cast_fp16, y = var_2332_cast_fp16)[name = string("normed_output_179_cast_fp16")]; + tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168353792)))]; + tensor normed_output_181_cast_fp16 = mul(x = normed_output_179_cast_fp16, y = const_173_to_fp16)[name = string("normed_output_181_cast_fp16")]; + tensor hidden_states_409_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = normed_output_181_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; + fp16 var_33_promoted_49_to_fp16 = const()[name = string("op_33_promoted_49_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2346_cast_fp16 = pow(x = hidden_states_409_cast_fp16, y = var_33_promoted_49_to_fp16)[name = string("op_2346_cast_fp16")]; + tensor var_2348_axes_0 = const()[name = string("op_2348_axes_0"), val = tensor([-1])]; + bool var_2348_keep_dims_0 = const()[name = string("op_2348_keep_dims_0"), val = bool(true)]; + tensor var_2348_cast_fp16 = reduce_mean(axes = var_2348_axes_0, keep_dims = var_2348_keep_dims_0, x = var_2346_cast_fp16)[name = string("op_2348_cast_fp16")]; + fp16 var_2349_to_fp16 = const()[name = string("op_2349_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_99_cast_fp16 = add(x = var_2348_cast_fp16, y = var_2349_to_fp16)[name = string("mean_squared_99_cast_fp16")]; + tensor var_2351_cast_fp16 = pow(x = mean_squared_99_cast_fp16, y = var_27_to_fp16)[name = string("op_2351_cast_fp16")]; + tensor normed_output_183_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = var_2351_cast_fp16)[name = string("normed_output_183_cast_fp16")]; + tensor const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168355392)))]; + tensor normed_output_185_cast_fp16 = mul(x = normed_output_183_cast_fp16, y = const_174_to_fp16)[name = string("normed_output_185_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.26p+3)]; + fp16 model_vision_tower_encoder_layers_7_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.24p+3)]; + tensor clip_98_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_185_cast_fp16)[name = string("clip_98_cast_fp16")]; + tensor model_vision_tower_encoder_layers_7_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168356992)))]; + tensor linear_50_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_7_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_98_cast_fp16)[name = string("linear_50_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.92p+3)]; + fp16 model_vision_tower_encoder_layers_7_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.8ep+3)]; + tensor clip_99_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_50_cast_fp16)[name = string("clip_99_cast_fp16")]; + tensor var_2373 = const()[name = string("op_2373"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_417_cast_fp16 = reshape(shape = var_2373, x = clip_99_cast_fp16)[name = string("hidden_states_417_cast_fp16")]; + fp16 var_33_promoted_50_to_fp16 = const()[name = string("op_33_promoted_50_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2377_cast_fp16 = pow(x = hidden_states_417_cast_fp16, y = var_33_promoted_50_to_fp16)[name = string("op_2377_cast_fp16")]; + tensor var_2379_axes_0 = const()[name = string("op_2379_axes_0"), val = tensor([-1])]; + bool var_2379_keep_dims_0 = const()[name = string("op_2379_keep_dims_0"), val = bool(true)]; + tensor var_2379_cast_fp16 = reduce_mean(axes = var_2379_axes_0, keep_dims = var_2379_keep_dims_0, x = var_2377_cast_fp16)[name = string("op_2379_cast_fp16")]; + fp16 var_2380_to_fp16 = const()[name = string("op_2380_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_101_cast_fp16 = add(x = var_2379_cast_fp16, y = var_2380_to_fp16)[name = string("mean_squared_101_cast_fp16")]; + tensor var_2382_cast_fp16 = pow(x = mean_squared_101_cast_fp16, y = var_27_to_fp16)[name = string("op_2382_cast_fp16")]; + tensor normed_output_187_cast_fp16 = mul(x = hidden_states_417_cast_fp16, y = var_2382_cast_fp16)[name = string("normed_output_187_cast_fp16")]; + tensor const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169536704)))]; + tensor normed_output_189_cast_fp16 = mul(x = normed_output_187_cast_fp16, y = const_177_to_fp16)[name = string("normed_output_189_cast_fp16")]; + tensor var_2402 = const()[name = string("op_2402"), val = tensor([32, 32])]; + int32 var_2403_axis_0 = const()[name = string("op_2403_axis_0"), val = int32(-1)]; + tensor var_2403_cast_fp16_0, tensor var_2403_cast_fp16_1 = split(axis = var_2403_axis_0, split_sizes = var_2402, x = normed_output_189_cast_fp16)[name = string("op_2403_cast_fp16")]; + tensor var_2406 = const()[name = string("op_2406"), val = tensor([32, 32])]; + int32 var_2407_axis_0 = const()[name = string("op_2407_axis_0"), val = int32(-1)]; + tensor var_2407_0, tensor var_2407_1 = split(axis = var_2407_axis_0, split_sizes = var_2406, x = var_160_cast_fp16)[name = string("op_2407")]; + tensor var_2410 = const()[name = string("op_2410"), val = tensor([32, 32])]; + int32 var_2411_axis_0 = const()[name = string("op_2411_axis_0"), val = int32(-1)]; + tensor var_2411_0, tensor var_2411_1 = split(axis = var_2411_axis_0, split_sizes = var_2410, x = var_163_cast_fp16)[name = string("op_2411")]; + tensor cos_117_axes_0 = const()[name = string("cos_117_axes_0"), val = tensor([2])]; + tensor cos_117 = expand_dims(axes = cos_117_axes_0, x = var_2407_0)[name = string("cos_117")]; + tensor sin_117_axes_0 = const()[name = string("sin_117_axes_0"), val = tensor([2])]; + tensor sin_117 = expand_dims(axes = sin_117_axes_0, x = var_2411_0)[name = string("sin_117")]; + tensor var_2416_cast_fp16 = mul(x = var_2403_cast_fp16_0, y = cos_117)[name = string("op_2416_cast_fp16")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_2403_cast_fp16_0)[name = string("x1_57_cast_fp16")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_2403_cast_fp16_0)[name = string("x2_57_cast_fp16")]; + fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2427_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_182_promoted_to_fp16)[name = string("op_2427_cast_fp16")]; + bool var_2429_interleave_0 = const()[name = string("op_2429_interleave_0"), val = bool(false)]; + tensor var_2429_cast_fp16 = concat(axis = var_38, interleave = var_2429_interleave_0, values = (var_2427_cast_fp16, x1_57_cast_fp16))[name = string("op_2429_cast_fp16")]; + tensor var_2430_cast_fp16 = mul(x = var_2429_cast_fp16, y = sin_117)[name = string("op_2430_cast_fp16")]; + tensor var_2431_cast_fp16 = add(x = var_2416_cast_fp16, y = var_2430_cast_fp16)[name = string("op_2431_cast_fp16")]; + tensor cos_121_axes_0 = const()[name = string("cos_121_axes_0"), val = tensor([2])]; + tensor cos_121 = expand_dims(axes = cos_121_axes_0, x = var_2407_1)[name = string("cos_121")]; + tensor sin_121_axes_0 = const()[name = string("sin_121_axes_0"), val = tensor([2])]; + tensor sin_121 = expand_dims(axes = sin_121_axes_0, x = var_2411_1)[name = string("sin_121")]; + tensor var_2434_cast_fp16 = mul(x = var_2403_cast_fp16_1, y = cos_121)[name = string("op_2434_cast_fp16")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_2403_cast_fp16_1)[name = string("x1_59_cast_fp16")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_2403_cast_fp16_1)[name = string("x2_59_cast_fp16")]; + fp16 const_185_promoted_to_fp16 = const()[name = string("const_185_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2445_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_185_promoted_to_fp16)[name = string("op_2445_cast_fp16")]; + bool var_2447_interleave_0 = const()[name = string("op_2447_interleave_0"), val = bool(false)]; + tensor var_2447_cast_fp16 = concat(axis = var_38, interleave = var_2447_interleave_0, values = (var_2445_cast_fp16, x1_59_cast_fp16))[name = string("op_2447_cast_fp16")]; + tensor var_2448_cast_fp16 = mul(x = var_2447_cast_fp16, y = sin_121)[name = string("op_2448_cast_fp16")]; + tensor var_2449_cast_fp16 = add(x = var_2434_cast_fp16, y = var_2448_cast_fp16)[name = string("op_2449_cast_fp16")]; + bool query_states_15_interleave_0 = const()[name = string("query_states_15_interleave_0"), val = bool(false)]; + tensor query_states_15_cast_fp16 = concat(axis = var_38, interleave = query_states_15_interleave_0, values = (var_2431_cast_fp16, var_2449_cast_fp16))[name = string("query_states_15_cast_fp16")]; + tensor model_vision_tower_encoder_layers_7_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169536896)))]; + tensor linear_51_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_7_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_98_cast_fp16)[name = string("linear_51_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.e4p+3)]; + fp16 model_vision_tower_encoder_layers_7_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.ep+3)]; + tensor clip_101_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_51_cast_fp16)[name = string("clip_101_cast_fp16")]; + tensor var_2462 = const()[name = string("op_2462"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_423_cast_fp16 = reshape(shape = var_2462, x = clip_101_cast_fp16)[name = string("hidden_states_423_cast_fp16")]; + fp16 var_33_promoted_51_to_fp16 = const()[name = string("op_33_promoted_51_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2466_cast_fp16 = pow(x = hidden_states_423_cast_fp16, y = var_33_promoted_51_to_fp16)[name = string("op_2466_cast_fp16")]; + tensor var_2468_axes_0 = const()[name = string("op_2468_axes_0"), val = tensor([-1])]; + bool var_2468_keep_dims_0 = const()[name = string("op_2468_keep_dims_0"), val = bool(true)]; + tensor var_2468_cast_fp16 = reduce_mean(axes = var_2468_axes_0, keep_dims = var_2468_keep_dims_0, x = var_2466_cast_fp16)[name = string("op_2468_cast_fp16")]; + fp16 var_2469_to_fp16 = const()[name = string("op_2469_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_103_cast_fp16 = add(x = var_2468_cast_fp16, y = var_2469_to_fp16)[name = string("mean_squared_103_cast_fp16")]; + tensor var_2471_cast_fp16 = pow(x = mean_squared_103_cast_fp16, y = var_27_to_fp16)[name = string("op_2471_cast_fp16")]; + tensor normed_output_191_cast_fp16 = mul(x = hidden_states_423_cast_fp16, y = var_2471_cast_fp16)[name = string("normed_output_191_cast_fp16")]; + tensor const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170716608)))]; + tensor normed_output_193_cast_fp16 = mul(x = normed_output_191_cast_fp16, y = const_186_to_fp16)[name = string("normed_output_193_cast_fp16")]; + tensor var_2491 = const()[name = string("op_2491"), val = tensor([32, 32])]; + int32 var_2492_axis_0 = const()[name = string("op_2492_axis_0"), val = int32(-1)]; + tensor var_2492_cast_fp16_0, tensor var_2492_cast_fp16_1 = split(axis = var_2492_axis_0, split_sizes = var_2491, x = normed_output_193_cast_fp16)[name = string("op_2492_cast_fp16")]; + tensor var_2495 = const()[name = string("op_2495"), val = tensor([32, 32])]; + int32 var_2496_axis_0 = const()[name = string("op_2496_axis_0"), val = int32(-1)]; + tensor var_2496_0, tensor var_2496_1 = split(axis = var_2496_axis_0, split_sizes = var_2495, x = var_160_cast_fp16)[name = string("op_2496")]; + tensor var_2499 = const()[name = string("op_2499"), val = tensor([32, 32])]; + int32 var_2500_axis_0 = const()[name = string("op_2500_axis_0"), val = int32(-1)]; + tensor var_2500_0, tensor var_2500_1 = split(axis = var_2500_axis_0, split_sizes = var_2499, x = var_163_cast_fp16)[name = string("op_2500")]; + tensor cos_125_axes_0 = const()[name = string("cos_125_axes_0"), val = tensor([2])]; + tensor cos_125 = expand_dims(axes = cos_125_axes_0, x = var_2496_0)[name = string("cos_125")]; + tensor sin_125_axes_0 = const()[name = string("sin_125_axes_0"), val = tensor([2])]; + tensor sin_125 = expand_dims(axes = sin_125_axes_0, x = var_2500_0)[name = string("sin_125")]; + tensor var_2505_cast_fp16 = mul(x = var_2492_cast_fp16_0, y = cos_125)[name = string("op_2505_cast_fp16")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_2492_cast_fp16_0)[name = string("x1_61_cast_fp16")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_2492_cast_fp16_0)[name = string("x2_61_cast_fp16")]; + fp16 const_191_promoted_to_fp16 = const()[name = string("const_191_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2516_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_191_promoted_to_fp16)[name = string("op_2516_cast_fp16")]; + bool var_2518_interleave_0 = const()[name = string("op_2518_interleave_0"), val = bool(false)]; + tensor var_2518_cast_fp16 = concat(axis = var_38, interleave = var_2518_interleave_0, values = (var_2516_cast_fp16, x1_61_cast_fp16))[name = string("op_2518_cast_fp16")]; + tensor var_2519_cast_fp16 = mul(x = var_2518_cast_fp16, y = sin_125)[name = string("op_2519_cast_fp16")]; + tensor var_2520_cast_fp16 = add(x = var_2505_cast_fp16, y = var_2519_cast_fp16)[name = string("op_2520_cast_fp16")]; + tensor cos_129_axes_0 = const()[name = string("cos_129_axes_0"), val = tensor([2])]; + tensor cos_129 = expand_dims(axes = cos_129_axes_0, x = var_2496_1)[name = string("cos_129")]; + tensor sin_129_axes_0 = const()[name = string("sin_129_axes_0"), val = tensor([2])]; + tensor sin_129 = expand_dims(axes = sin_129_axes_0, x = var_2500_1)[name = string("sin_129")]; + tensor var_2523_cast_fp16 = mul(x = var_2492_cast_fp16_1, y = cos_129)[name = string("op_2523_cast_fp16")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = var_2492_cast_fp16_1)[name = string("x1_63_cast_fp16")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = var_2492_cast_fp16_1)[name = string("x2_63_cast_fp16")]; + fp16 const_194_promoted_to_fp16 = const()[name = string("const_194_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2534_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_194_promoted_to_fp16)[name = string("op_2534_cast_fp16")]; + bool var_2536_interleave_0 = const()[name = string("op_2536_interleave_0"), val = bool(false)]; + tensor var_2536_cast_fp16 = concat(axis = var_38, interleave = var_2536_interleave_0, values = (var_2534_cast_fp16, x1_63_cast_fp16))[name = string("op_2536_cast_fp16")]; + tensor var_2537_cast_fp16 = mul(x = var_2536_cast_fp16, y = sin_129)[name = string("op_2537_cast_fp16")]; + tensor var_2538_cast_fp16 = add(x = var_2523_cast_fp16, y = var_2537_cast_fp16)[name = string("op_2538_cast_fp16")]; + bool key_states_15_interleave_0 = const()[name = string("key_states_15_interleave_0"), val = bool(false)]; + tensor key_states_15_cast_fp16 = concat(axis = var_38, interleave = key_states_15_interleave_0, values = (var_2520_cast_fp16, var_2538_cast_fp16))[name = string("key_states_15_cast_fp16")]; + tensor model_vision_tower_encoder_layers_7_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170716800)))]; + tensor linear_52_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_7_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_98_cast_fp16)[name = string("linear_52_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.e4p+3)]; + fp16 model_vision_tower_encoder_layers_7_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.ep+3)]; + tensor clip_103_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_52_cast_fp16)[name = string("clip_103_cast_fp16")]; + tensor var_2551 = const()[name = string("op_2551"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_429_cast_fp16 = reshape(shape = var_2551, x = clip_103_cast_fp16)[name = string("hidden_states_429_cast_fp16")]; + fp16 var_33_promoted_52_to_fp16 = const()[name = string("op_33_promoted_52_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2554_cast_fp16 = pow(x = hidden_states_429_cast_fp16, y = var_33_promoted_52_to_fp16)[name = string("op_2554_cast_fp16")]; + tensor var_2556_axes_0 = const()[name = string("op_2556_axes_0"), val = tensor([-1])]; + bool var_2556_keep_dims_0 = const()[name = string("op_2556_keep_dims_0"), val = bool(true)]; + tensor var_2556_cast_fp16 = reduce_mean(axes = var_2556_axes_0, keep_dims = var_2556_keep_dims_0, x = var_2554_cast_fp16)[name = string("op_2556_cast_fp16")]; + fp16 var_2557_to_fp16 = const()[name = string("op_2557_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_105_cast_fp16 = add(x = var_2556_cast_fp16, y = var_2557_to_fp16)[name = string("mean_squared_105_cast_fp16")]; + tensor var_2559_cast_fp16 = pow(x = mean_squared_105_cast_fp16, y = var_27_to_fp16)[name = string("op_2559_cast_fp16")]; + tensor normed_output_195_cast_fp16 = mul(x = hidden_states_429_cast_fp16, y = var_2559_cast_fp16)[name = string("normed_output_195_cast_fp16")]; + tensor hidden_states_435_perm_0 = const()[name = string("hidden_states_435_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(true)]; + bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; + tensor transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_79 = transpose(perm = transpose_79_perm_0, x = key_states_15_cast_fp16)[name = string("transpose_129")]; + tensor transpose_78 = transpose(perm = transpose_78_perm_0, x = query_states_15_cast_fp16)[name = string("transpose_130")]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("matmul_7_cast_fp16")]; + tensor add_7_cast_fp16 = add(x = matmul_7_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_7_cast_fp16")]; + int32 softmax_7_axis_0 = const()[name = string("softmax_7_axis_0"), val = int32(-1)]; + tensor softmax_7_cast_fp16 = softmax(axis = softmax_7_axis_0, x = add_7_cast_fp16)[name = string("softmax_7_cast_fp16")]; + bool attn_output_29_transpose_x_0 = const()[name = string("attn_output_29_transpose_x_0"), val = bool(false)]; + bool attn_output_29_transpose_y_0 = const()[name = string("attn_output_29_transpose_y_0"), val = bool(false)]; + tensor hidden_states_435_cast_fp16 = transpose(perm = hidden_states_435_perm_0, x = normed_output_195_cast_fp16)[name = string("transpose_131")]; + tensor attn_output_29_cast_fp16 = matmul(transpose_x = attn_output_29_transpose_x_0, transpose_y = attn_output_29_transpose_y_0, x = softmax_7_cast_fp16, y = hidden_states_435_cast_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor var_2564_perm_0 = const()[name = string("op_2564_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2566 = const()[name = string("op_2566"), val = tensor([1, 2304, -1])]; + tensor var_2564_cast_fp16 = transpose(perm = var_2564_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_128")]; + tensor var_2567_cast_fp16 = reshape(shape = var_2566, x = var_2564_cast_fp16)[name = string("op_2567_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.46p+1)]; + fp16 model_vision_tower_encoder_layers_7_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.42p+1)]; + tensor clip_104_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_self_attn_o_proj_input_max_promoted_to_fp16, x = var_2567_cast_fp16)[name = string("clip_104_cast_fp16")]; + tensor model_vision_tower_encoder_layers_7_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171896512)))]; + tensor linear_53_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_7_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_104_cast_fp16)[name = string("linear_53_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.e4p+1)]; + fp16 model_vision_tower_encoder_layers_7_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.ep+1)]; + tensor clip_105_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_53_cast_fp16)[name = string("clip_105_cast_fp16")]; + fp16 var_33_promoted_53_to_fp16 = const()[name = string("op_33_promoted_53_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2580_cast_fp16 = pow(x = clip_105_cast_fp16, y = var_33_promoted_53_to_fp16)[name = string("op_2580_cast_fp16")]; + tensor var_2582_axes_0 = const()[name = string("op_2582_axes_0"), val = tensor([-1])]; + bool var_2582_keep_dims_0 = const()[name = string("op_2582_keep_dims_0"), val = bool(true)]; + tensor var_2582_cast_fp16 = reduce_mean(axes = var_2582_axes_0, keep_dims = var_2582_keep_dims_0, x = var_2580_cast_fp16)[name = string("op_2582_cast_fp16")]; + fp16 var_2583_to_fp16 = const()[name = string("op_2583_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_107_cast_fp16 = add(x = var_2582_cast_fp16, y = var_2583_to_fp16)[name = string("mean_squared_107_cast_fp16")]; + tensor var_2585_cast_fp16 = pow(x = mean_squared_107_cast_fp16, y = var_27_to_fp16)[name = string("op_2585_cast_fp16")]; + tensor normed_output_197_cast_fp16 = mul(x = clip_105_cast_fp16, y = var_2585_cast_fp16)[name = string("normed_output_197_cast_fp16")]; + tensor const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173076224)))]; + tensor normed_output_199_cast_fp16 = mul(x = normed_output_197_cast_fp16, y = const_195_to_fp16)[name = string("normed_output_199_cast_fp16")]; + tensor hidden_states_447_cast_fp16 = add(x = hidden_states_409_cast_fp16, y = normed_output_199_cast_fp16)[name = string("hidden_states_447_cast_fp16")]; + fp16 var_33_promoted_54_to_fp16 = const()[name = string("op_33_promoted_54_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2593_cast_fp16 = pow(x = hidden_states_447_cast_fp16, y = var_33_promoted_54_to_fp16)[name = string("op_2593_cast_fp16")]; + tensor var_2595_axes_0 = const()[name = string("op_2595_axes_0"), val = tensor([-1])]; + bool var_2595_keep_dims_0 = const()[name = string("op_2595_keep_dims_0"), val = bool(true)]; + tensor var_2595_cast_fp16 = reduce_mean(axes = var_2595_axes_0, keep_dims = var_2595_keep_dims_0, x = var_2593_cast_fp16)[name = string("op_2595_cast_fp16")]; + fp16 var_2596_to_fp16 = const()[name = string("op_2596_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_109_cast_fp16 = add(x = var_2595_cast_fp16, y = var_2596_to_fp16)[name = string("mean_squared_109_cast_fp16")]; + tensor var_2598_cast_fp16 = pow(x = mean_squared_109_cast_fp16, y = var_27_to_fp16)[name = string("op_2598_cast_fp16")]; + tensor normed_output_201_cast_fp16 = mul(x = hidden_states_447_cast_fp16, y = var_2598_cast_fp16)[name = string("normed_output_201_cast_fp16")]; + tensor const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173077824)))]; + tensor normed_output_203_cast_fp16 = mul(x = normed_output_201_cast_fp16, y = const_196_to_fp16)[name = string("normed_output_203_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.1ep+3)]; + fp16 model_vision_tower_encoder_layers_7_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.1cp+3)]; + tensor clip_106_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_203_cast_fp16)[name = string("clip_106_cast_fp16")]; + tensor model_vision_tower_encoder_layers_7_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173079424)))]; + tensor linear_54_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_7_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_106_cast_fp16)[name = string("linear_54_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.48p+3)]; + fp16 model_vision_tower_encoder_layers_7_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.46p+3)]; + tensor clip_107_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_54_cast_fp16)[name = string("clip_107_cast_fp16")]; + string var_2615_mode_0 = const()[name = string("op_2615_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2615_cast_fp16 = gelu(mode = var_2615_mode_0, x = clip_107_cast_fp16)[name = string("op_2615_cast_fp16")]; + tensor model_vision_tower_encoder_layers_7_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177798080)))]; + tensor linear_55_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_7_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_106_cast_fp16)[name = string("linear_55_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.48p+3)]; + fp16 model_vision_tower_encoder_layers_7_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.46p+3)]; + tensor clip_109_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_mlp_up_proj_output_max_promoted_to_fp16, x = linear_55_cast_fp16)[name = string("clip_109_cast_fp16")]; + tensor hidden_states_457_cast_fp16 = mul(x = var_2615_cast_fp16, y = clip_109_cast_fp16)[name = string("hidden_states_457_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.02p+5)]; + fp16 model_vision_tower_encoder_layers_7_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1p+5)]; + tensor clip_110_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_457_cast_fp16)[name = string("clip_110_cast_fp16")]; + tensor model_vision_tower_encoder_layers_7_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182516736)))]; + tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_7_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_110_cast_fp16)[name = string("linear_56_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_7_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.dap+3)]; + fp16 model_vision_tower_encoder_layers_7_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_7_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.d6p+3)]; + tensor clip_111_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_7_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_7_mlp_down_proj_output_max_promoted_to_fp16, x = linear_56_cast_fp16)[name = string("clip_111_cast_fp16")]; + fp16 var_33_promoted_55_to_fp16 = const()[name = string("op_33_promoted_55_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2637_cast_fp16 = pow(x = clip_111_cast_fp16, y = var_33_promoted_55_to_fp16)[name = string("op_2637_cast_fp16")]; + tensor var_2639_axes_0 = const()[name = string("op_2639_axes_0"), val = tensor([-1])]; + bool var_2639_keep_dims_0 = const()[name = string("op_2639_keep_dims_0"), val = bool(true)]; + tensor var_2639_cast_fp16 = reduce_mean(axes = var_2639_axes_0, keep_dims = var_2639_keep_dims_0, x = var_2637_cast_fp16)[name = string("op_2639_cast_fp16")]; + fp16 var_2640_to_fp16 = const()[name = string("op_2640_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_111_cast_fp16 = add(x = var_2639_cast_fp16, y = var_2640_to_fp16)[name = string("mean_squared_111_cast_fp16")]; + tensor var_2642_cast_fp16 = pow(x = mean_squared_111_cast_fp16, y = var_27_to_fp16)[name = string("op_2642_cast_fp16")]; + tensor normed_output_205_cast_fp16 = mul(x = clip_111_cast_fp16, y = var_2642_cast_fp16)[name = string("normed_output_205_cast_fp16")]; + tensor const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187235392)))]; + tensor normed_output_207_cast_fp16 = mul(x = normed_output_205_cast_fp16, y = const_197_to_fp16)[name = string("normed_output_207_cast_fp16")]; + tensor hidden_states_467_cast_fp16 = add(x = hidden_states_447_cast_fp16, y = normed_output_207_cast_fp16)[name = string("hidden_states_467_cast_fp16")]; + fp16 var_33_promoted_56_to_fp16 = const()[name = string("op_33_promoted_56_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2656_cast_fp16 = pow(x = hidden_states_467_cast_fp16, y = var_33_promoted_56_to_fp16)[name = string("op_2656_cast_fp16")]; + tensor var_2658_axes_0 = const()[name = string("op_2658_axes_0"), val = tensor([-1])]; + bool var_2658_keep_dims_0 = const()[name = string("op_2658_keep_dims_0"), val = bool(true)]; + tensor var_2658_cast_fp16 = reduce_mean(axes = var_2658_axes_0, keep_dims = var_2658_keep_dims_0, x = var_2656_cast_fp16)[name = string("op_2658_cast_fp16")]; + fp16 var_2659_to_fp16 = const()[name = string("op_2659_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_113_cast_fp16 = add(x = var_2658_cast_fp16, y = var_2659_to_fp16)[name = string("mean_squared_113_cast_fp16")]; + tensor var_2661_cast_fp16 = pow(x = mean_squared_113_cast_fp16, y = var_27_to_fp16)[name = string("op_2661_cast_fp16")]; + tensor normed_output_209_cast_fp16 = mul(x = hidden_states_467_cast_fp16, y = var_2661_cast_fp16)[name = string("normed_output_209_cast_fp16")]; + tensor const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187236992)))]; + tensor normed_output_211_cast_fp16 = mul(x = normed_output_209_cast_fp16, y = const_198_to_fp16)[name = string("normed_output_211_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.52p+3)]; + fp16 model_vision_tower_encoder_layers_8_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.4ep+3)]; + tensor clip_112_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_211_cast_fp16)[name = string("clip_112_cast_fp16")]; + tensor model_vision_tower_encoder_layers_8_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187238592)))]; + tensor linear_57_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_8_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_112_cast_fp16)[name = string("linear_57_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.dcp+3)]; + fp16 model_vision_tower_encoder_layers_8_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.d8p+3)]; + tensor clip_113_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_57_cast_fp16)[name = string("clip_113_cast_fp16")]; + tensor var_2683 = const()[name = string("op_2683"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_475_cast_fp16 = reshape(shape = var_2683, x = clip_113_cast_fp16)[name = string("hidden_states_475_cast_fp16")]; + fp16 var_33_promoted_57_to_fp16 = const()[name = string("op_33_promoted_57_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2687_cast_fp16 = pow(x = hidden_states_475_cast_fp16, y = var_33_promoted_57_to_fp16)[name = string("op_2687_cast_fp16")]; + tensor var_2689_axes_0 = const()[name = string("op_2689_axes_0"), val = tensor([-1])]; + bool var_2689_keep_dims_0 = const()[name = string("op_2689_keep_dims_0"), val = bool(true)]; + tensor var_2689_cast_fp16 = reduce_mean(axes = var_2689_axes_0, keep_dims = var_2689_keep_dims_0, x = var_2687_cast_fp16)[name = string("op_2689_cast_fp16")]; + fp16 var_2690_to_fp16 = const()[name = string("op_2690_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_115_cast_fp16 = add(x = var_2689_cast_fp16, y = var_2690_to_fp16)[name = string("mean_squared_115_cast_fp16")]; + tensor var_2692_cast_fp16 = pow(x = mean_squared_115_cast_fp16, y = var_27_to_fp16)[name = string("op_2692_cast_fp16")]; + tensor normed_output_213_cast_fp16 = mul(x = hidden_states_475_cast_fp16, y = var_2692_cast_fp16)[name = string("normed_output_213_cast_fp16")]; + tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188418304)))]; + tensor normed_output_215_cast_fp16 = mul(x = normed_output_213_cast_fp16, y = const_201_to_fp16)[name = string("normed_output_215_cast_fp16")]; + tensor var_2712 = const()[name = string("op_2712"), val = tensor([32, 32])]; + int32 var_2713_axis_0 = const()[name = string("op_2713_axis_0"), val = int32(-1)]; + tensor var_2713_cast_fp16_0, tensor var_2713_cast_fp16_1 = split(axis = var_2713_axis_0, split_sizes = var_2712, x = normed_output_215_cast_fp16)[name = string("op_2713_cast_fp16")]; + tensor var_2716 = const()[name = string("op_2716"), val = tensor([32, 32])]; + int32 var_2717_axis_0 = const()[name = string("op_2717_axis_0"), val = int32(-1)]; + tensor var_2717_0, tensor var_2717_1 = split(axis = var_2717_axis_0, split_sizes = var_2716, x = var_160_cast_fp16)[name = string("op_2717")]; + tensor var_2720 = const()[name = string("op_2720"), val = tensor([32, 32])]; + int32 var_2721_axis_0 = const()[name = string("op_2721_axis_0"), val = int32(-1)]; + tensor var_2721_0, tensor var_2721_1 = split(axis = var_2721_axis_0, split_sizes = var_2720, x = var_163_cast_fp16)[name = string("op_2721")]; + tensor cos_133_axes_0 = const()[name = string("cos_133_axes_0"), val = tensor([2])]; + tensor cos_133 = expand_dims(axes = cos_133_axes_0, x = var_2717_0)[name = string("cos_133")]; + tensor sin_133_axes_0 = const()[name = string("sin_133_axes_0"), val = tensor([2])]; + tensor sin_133 = expand_dims(axes = sin_133_axes_0, x = var_2721_0)[name = string("sin_133")]; + tensor var_2726_cast_fp16 = mul(x = var_2713_cast_fp16_0, y = cos_133)[name = string("op_2726_cast_fp16")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = var_2713_cast_fp16_0)[name = string("x1_65_cast_fp16")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = var_2713_cast_fp16_0)[name = string("x2_65_cast_fp16")]; + fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2737_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_2737_cast_fp16")]; + bool var_2739_interleave_0 = const()[name = string("op_2739_interleave_0"), val = bool(false)]; + tensor var_2739_cast_fp16 = concat(axis = var_38, interleave = var_2739_interleave_0, values = (var_2737_cast_fp16, x1_65_cast_fp16))[name = string("op_2739_cast_fp16")]; + tensor var_2740_cast_fp16 = mul(x = var_2739_cast_fp16, y = sin_133)[name = string("op_2740_cast_fp16")]; + tensor var_2741_cast_fp16 = add(x = var_2726_cast_fp16, y = var_2740_cast_fp16)[name = string("op_2741_cast_fp16")]; + tensor cos_137_axes_0 = const()[name = string("cos_137_axes_0"), val = tensor([2])]; + tensor cos_137 = expand_dims(axes = cos_137_axes_0, x = var_2717_1)[name = string("cos_137")]; + tensor sin_137_axes_0 = const()[name = string("sin_137_axes_0"), val = tensor([2])]; + tensor sin_137 = expand_dims(axes = sin_137_axes_0, x = var_2721_1)[name = string("sin_137")]; + tensor var_2744_cast_fp16 = mul(x = var_2713_cast_fp16_1, y = cos_137)[name = string("op_2744_cast_fp16")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = var_2713_cast_fp16_1)[name = string("x1_67_cast_fp16")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = var_2713_cast_fp16_1)[name = string("x2_67_cast_fp16")]; + fp16 const_209_promoted_to_fp16 = const()[name = string("const_209_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2755_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_209_promoted_to_fp16)[name = string("op_2755_cast_fp16")]; + bool var_2757_interleave_0 = const()[name = string("op_2757_interleave_0"), val = bool(false)]; + tensor var_2757_cast_fp16 = concat(axis = var_38, interleave = var_2757_interleave_0, values = (var_2755_cast_fp16, x1_67_cast_fp16))[name = string("op_2757_cast_fp16")]; + tensor var_2758_cast_fp16 = mul(x = var_2757_cast_fp16, y = sin_137)[name = string("op_2758_cast_fp16")]; + tensor var_2759_cast_fp16 = add(x = var_2744_cast_fp16, y = var_2758_cast_fp16)[name = string("op_2759_cast_fp16")]; + bool query_states_17_interleave_0 = const()[name = string("query_states_17_interleave_0"), val = bool(false)]; + tensor query_states_17_cast_fp16 = concat(axis = var_38, interleave = query_states_17_interleave_0, values = (var_2741_cast_fp16, var_2759_cast_fp16))[name = string("query_states_17_cast_fp16")]; + tensor model_vision_tower_encoder_layers_8_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188418496)))]; + tensor linear_58_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_8_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_112_cast_fp16)[name = string("linear_58_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.16p+4)]; + fp16 model_vision_tower_encoder_layers_8_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.14p+4)]; + tensor clip_115_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_58_cast_fp16)[name = string("clip_115_cast_fp16")]; + tensor var_2772 = const()[name = string("op_2772"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_481_cast_fp16 = reshape(shape = var_2772, x = clip_115_cast_fp16)[name = string("hidden_states_481_cast_fp16")]; + fp16 var_33_promoted_58_to_fp16 = const()[name = string("op_33_promoted_58_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2776_cast_fp16 = pow(x = hidden_states_481_cast_fp16, y = var_33_promoted_58_to_fp16)[name = string("op_2776_cast_fp16")]; + tensor var_2778_axes_0 = const()[name = string("op_2778_axes_0"), val = tensor([-1])]; + bool var_2778_keep_dims_0 = const()[name = string("op_2778_keep_dims_0"), val = bool(true)]; + tensor var_2778_cast_fp16 = reduce_mean(axes = var_2778_axes_0, keep_dims = var_2778_keep_dims_0, x = var_2776_cast_fp16)[name = string("op_2778_cast_fp16")]; + fp16 var_2779_to_fp16 = const()[name = string("op_2779_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_117_cast_fp16 = add(x = var_2778_cast_fp16, y = var_2779_to_fp16)[name = string("mean_squared_117_cast_fp16")]; + tensor var_2781_cast_fp16 = pow(x = mean_squared_117_cast_fp16, y = var_27_to_fp16)[name = string("op_2781_cast_fp16")]; + tensor normed_output_217_cast_fp16 = mul(x = hidden_states_481_cast_fp16, y = var_2781_cast_fp16)[name = string("normed_output_217_cast_fp16")]; + tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189598208)))]; + tensor normed_output_219_cast_fp16 = mul(x = normed_output_217_cast_fp16, y = const_210_to_fp16)[name = string("normed_output_219_cast_fp16")]; + tensor var_2801 = const()[name = string("op_2801"), val = tensor([32, 32])]; + int32 var_2802_axis_0 = const()[name = string("op_2802_axis_0"), val = int32(-1)]; + tensor var_2802_cast_fp16_0, tensor var_2802_cast_fp16_1 = split(axis = var_2802_axis_0, split_sizes = var_2801, x = normed_output_219_cast_fp16)[name = string("op_2802_cast_fp16")]; + tensor var_2805 = const()[name = string("op_2805"), val = tensor([32, 32])]; + int32 var_2806_axis_0 = const()[name = string("op_2806_axis_0"), val = int32(-1)]; + tensor var_2806_0, tensor var_2806_1 = split(axis = var_2806_axis_0, split_sizes = var_2805, x = var_160_cast_fp16)[name = string("op_2806")]; + tensor var_2809 = const()[name = string("op_2809"), val = tensor([32, 32])]; + int32 var_2810_axis_0 = const()[name = string("op_2810_axis_0"), val = int32(-1)]; + tensor var_2810_0, tensor var_2810_1 = split(axis = var_2810_axis_0, split_sizes = var_2809, x = var_163_cast_fp16)[name = string("op_2810")]; + tensor cos_141_axes_0 = const()[name = string("cos_141_axes_0"), val = tensor([2])]; + tensor cos_141 = expand_dims(axes = cos_141_axes_0, x = var_2806_0)[name = string("cos_141")]; + tensor sin_141_axes_0 = const()[name = string("sin_141_axes_0"), val = tensor([2])]; + tensor sin_141 = expand_dims(axes = sin_141_axes_0, x = var_2810_0)[name = string("sin_141")]; + tensor var_2815_cast_fp16 = mul(x = var_2802_cast_fp16_0, y = cos_141)[name = string("op_2815_cast_fp16")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = var_2802_cast_fp16_0)[name = string("x1_69_cast_fp16")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = var_2802_cast_fp16_0)[name = string("x2_69_cast_fp16")]; + fp16 const_215_promoted_to_fp16 = const()[name = string("const_215_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2826_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_215_promoted_to_fp16)[name = string("op_2826_cast_fp16")]; + bool var_2828_interleave_0 = const()[name = string("op_2828_interleave_0"), val = bool(false)]; + tensor var_2828_cast_fp16 = concat(axis = var_38, interleave = var_2828_interleave_0, values = (var_2826_cast_fp16, x1_69_cast_fp16))[name = string("op_2828_cast_fp16")]; + tensor var_2829_cast_fp16 = mul(x = var_2828_cast_fp16, y = sin_141)[name = string("op_2829_cast_fp16")]; + tensor var_2830_cast_fp16 = add(x = var_2815_cast_fp16, y = var_2829_cast_fp16)[name = string("op_2830_cast_fp16")]; + tensor cos_145_axes_0 = const()[name = string("cos_145_axes_0"), val = tensor([2])]; + tensor cos_145 = expand_dims(axes = cos_145_axes_0, x = var_2806_1)[name = string("cos_145")]; + tensor sin_145_axes_0 = const()[name = string("sin_145_axes_0"), val = tensor([2])]; + tensor sin_145 = expand_dims(axes = sin_145_axes_0, x = var_2810_1)[name = string("sin_145")]; + tensor var_2833_cast_fp16 = mul(x = var_2802_cast_fp16_1, y = cos_145)[name = string("op_2833_cast_fp16")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = var_2802_cast_fp16_1)[name = string("x1_71_cast_fp16")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = var_2802_cast_fp16_1)[name = string("x2_71_cast_fp16")]; + fp16 const_218_promoted_to_fp16 = const()[name = string("const_218_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2844_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_218_promoted_to_fp16)[name = string("op_2844_cast_fp16")]; + bool var_2846_interleave_0 = const()[name = string("op_2846_interleave_0"), val = bool(false)]; + tensor var_2846_cast_fp16 = concat(axis = var_38, interleave = var_2846_interleave_0, values = (var_2844_cast_fp16, x1_71_cast_fp16))[name = string("op_2846_cast_fp16")]; + tensor var_2847_cast_fp16 = mul(x = var_2846_cast_fp16, y = sin_145)[name = string("op_2847_cast_fp16")]; + tensor var_2848_cast_fp16 = add(x = var_2833_cast_fp16, y = var_2847_cast_fp16)[name = string("op_2848_cast_fp16")]; + bool key_states_17_interleave_0 = const()[name = string("key_states_17_interleave_0"), val = bool(false)]; + tensor key_states_17_cast_fp16 = concat(axis = var_38, interleave = key_states_17_interleave_0, values = (var_2830_cast_fp16, var_2848_cast_fp16))[name = string("key_states_17_cast_fp16")]; + tensor model_vision_tower_encoder_layers_8_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189598400)))]; + tensor linear_59_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_8_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_112_cast_fp16)[name = string("linear_59_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.16p+4)]; + fp16 model_vision_tower_encoder_layers_8_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.14p+4)]; + tensor clip_117_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_59_cast_fp16)[name = string("clip_117_cast_fp16")]; + tensor var_2861 = const()[name = string("op_2861"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_487_cast_fp16 = reshape(shape = var_2861, x = clip_117_cast_fp16)[name = string("hidden_states_487_cast_fp16")]; + fp16 var_33_promoted_59_to_fp16 = const()[name = string("op_33_promoted_59_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2864_cast_fp16 = pow(x = hidden_states_487_cast_fp16, y = var_33_promoted_59_to_fp16)[name = string("op_2864_cast_fp16")]; + tensor var_2866_axes_0 = const()[name = string("op_2866_axes_0"), val = tensor([-1])]; + bool var_2866_keep_dims_0 = const()[name = string("op_2866_keep_dims_0"), val = bool(true)]; + tensor var_2866_cast_fp16 = reduce_mean(axes = var_2866_axes_0, keep_dims = var_2866_keep_dims_0, x = var_2864_cast_fp16)[name = string("op_2866_cast_fp16")]; + fp16 var_2867_to_fp16 = const()[name = string("op_2867_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_119_cast_fp16 = add(x = var_2866_cast_fp16, y = var_2867_to_fp16)[name = string("mean_squared_119_cast_fp16")]; + tensor var_2869_cast_fp16 = pow(x = mean_squared_119_cast_fp16, y = var_27_to_fp16)[name = string("op_2869_cast_fp16")]; + tensor normed_output_221_cast_fp16 = mul(x = hidden_states_487_cast_fp16, y = var_2869_cast_fp16)[name = string("normed_output_221_cast_fp16")]; + tensor hidden_states_493_perm_0 = const()[name = string("hidden_states_493_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(true)]; + bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; + tensor transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_81 = transpose(perm = transpose_81_perm_0, x = key_states_17_cast_fp16)[name = string("transpose_125")]; + tensor transpose_80 = transpose(perm = transpose_80_perm_0, x = query_states_17_cast_fp16)[name = string("transpose_126")]; + tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("matmul_8_cast_fp16")]; + tensor add_8_cast_fp16 = add(x = matmul_8_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_8_cast_fp16")]; + int32 softmax_8_axis_0 = const()[name = string("softmax_8_axis_0"), val = int32(-1)]; + tensor softmax_8_cast_fp16 = softmax(axis = softmax_8_axis_0, x = add_8_cast_fp16)[name = string("softmax_8_cast_fp16")]; + bool attn_output_33_transpose_x_0 = const()[name = string("attn_output_33_transpose_x_0"), val = bool(false)]; + bool attn_output_33_transpose_y_0 = const()[name = string("attn_output_33_transpose_y_0"), val = bool(false)]; + tensor hidden_states_493_cast_fp16 = transpose(perm = hidden_states_493_perm_0, x = normed_output_221_cast_fp16)[name = string("transpose_127")]; + tensor attn_output_33_cast_fp16 = matmul(transpose_x = attn_output_33_transpose_x_0, transpose_y = attn_output_33_transpose_y_0, x = softmax_8_cast_fp16, y = hidden_states_493_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_2874_perm_0 = const()[name = string("op_2874_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2876 = const()[name = string("op_2876"), val = tensor([1, 2304, -1])]; + tensor var_2874_cast_fp16 = transpose(perm = var_2874_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_124")]; + tensor var_2877_cast_fp16 = reshape(shape = var_2876, x = var_2874_cast_fp16)[name = string("op_2877_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.2cp+1)]; + fp16 model_vision_tower_encoder_layers_8_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.28p+1)]; + tensor clip_118_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_self_attn_o_proj_input_max_promoted_to_fp16, x = var_2877_cast_fp16)[name = string("clip_118_cast_fp16")]; + tensor model_vision_tower_encoder_layers_8_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190778112)))]; + tensor linear_60_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_8_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_118_cast_fp16)[name = string("linear_60_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.c2p+1)]; + fp16 model_vision_tower_encoder_layers_8_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.bep+1)]; + tensor clip_119_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_60_cast_fp16)[name = string("clip_119_cast_fp16")]; + fp16 var_33_promoted_60_to_fp16 = const()[name = string("op_33_promoted_60_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2890_cast_fp16 = pow(x = clip_119_cast_fp16, y = var_33_promoted_60_to_fp16)[name = string("op_2890_cast_fp16")]; + tensor var_2892_axes_0 = const()[name = string("op_2892_axes_0"), val = tensor([-1])]; + bool var_2892_keep_dims_0 = const()[name = string("op_2892_keep_dims_0"), val = bool(true)]; + tensor var_2892_cast_fp16 = reduce_mean(axes = var_2892_axes_0, keep_dims = var_2892_keep_dims_0, x = var_2890_cast_fp16)[name = string("op_2892_cast_fp16")]; + fp16 var_2893_to_fp16 = const()[name = string("op_2893_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_121_cast_fp16 = add(x = var_2892_cast_fp16, y = var_2893_to_fp16)[name = string("mean_squared_121_cast_fp16")]; + tensor var_2895_cast_fp16 = pow(x = mean_squared_121_cast_fp16, y = var_27_to_fp16)[name = string("op_2895_cast_fp16")]; + tensor normed_output_223_cast_fp16 = mul(x = clip_119_cast_fp16, y = var_2895_cast_fp16)[name = string("normed_output_223_cast_fp16")]; + tensor const_219_to_fp16 = const()[name = string("const_219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191957824)))]; + tensor normed_output_225_cast_fp16 = mul(x = normed_output_223_cast_fp16, y = const_219_to_fp16)[name = string("normed_output_225_cast_fp16")]; + tensor hidden_states_505_cast_fp16 = add(x = hidden_states_467_cast_fp16, y = normed_output_225_cast_fp16)[name = string("hidden_states_505_cast_fp16")]; + fp16 var_33_promoted_61_to_fp16 = const()[name = string("op_33_promoted_61_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2903_cast_fp16 = pow(x = hidden_states_505_cast_fp16, y = var_33_promoted_61_to_fp16)[name = string("op_2903_cast_fp16")]; + tensor var_2905_axes_0 = const()[name = string("op_2905_axes_0"), val = tensor([-1])]; + bool var_2905_keep_dims_0 = const()[name = string("op_2905_keep_dims_0"), val = bool(true)]; + tensor var_2905_cast_fp16 = reduce_mean(axes = var_2905_axes_0, keep_dims = var_2905_keep_dims_0, x = var_2903_cast_fp16)[name = string("op_2905_cast_fp16")]; + fp16 var_2906_to_fp16 = const()[name = string("op_2906_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_123_cast_fp16 = add(x = var_2905_cast_fp16, y = var_2906_to_fp16)[name = string("mean_squared_123_cast_fp16")]; + tensor var_2908_cast_fp16 = pow(x = mean_squared_123_cast_fp16, y = var_27_to_fp16)[name = string("op_2908_cast_fp16")]; + tensor normed_output_227_cast_fp16 = mul(x = hidden_states_505_cast_fp16, y = var_2908_cast_fp16)[name = string("normed_output_227_cast_fp16")]; + tensor const_220_to_fp16 = const()[name = string("const_220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191959424)))]; + tensor normed_output_229_cast_fp16 = mul(x = normed_output_227_cast_fp16, y = const_220_to_fp16)[name = string("normed_output_229_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.cp+2)]; + fp16 model_vision_tower_encoder_layers_8_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.bcp+2)]; + tensor clip_120_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_229_cast_fp16)[name = string("clip_120_cast_fp16")]; + tensor model_vision_tower_encoder_layers_8_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191961024)))]; + tensor linear_61_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_8_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_120_cast_fp16)[name = string("linear_61_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.d8p+2)]; + fp16 model_vision_tower_encoder_layers_8_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.d4p+2)]; + tensor clip_121_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_61_cast_fp16)[name = string("clip_121_cast_fp16")]; + string var_2925_mode_0 = const()[name = string("op_2925_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_2925_cast_fp16 = gelu(mode = var_2925_mode_0, x = clip_121_cast_fp16)[name = string("op_2925_cast_fp16")]; + tensor model_vision_tower_encoder_layers_8_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196679680)))]; + tensor linear_62_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_8_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_120_cast_fp16)[name = string("linear_62_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.d8p+2)]; + fp16 model_vision_tower_encoder_layers_8_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.d4p+2)]; + tensor clip_123_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_mlp_up_proj_output_max_promoted_to_fp16, x = linear_62_cast_fp16)[name = string("clip_123_cast_fp16")]; + tensor hidden_states_515_cast_fp16 = mul(x = var_2925_cast_fp16, y = clip_123_cast_fp16)[name = string("hidden_states_515_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.44p+4)]; + fp16 model_vision_tower_encoder_layers_8_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.42p+4)]; + tensor clip_124_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_515_cast_fp16)[name = string("clip_124_cast_fp16")]; + tensor model_vision_tower_encoder_layers_8_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201398336)))]; + tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_8_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_124_cast_fp16)[name = string("linear_63_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_8_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.8cp+2)]; + fp16 model_vision_tower_encoder_layers_8_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_8_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.8ap+2)]; + tensor clip_125_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_8_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_8_mlp_down_proj_output_max_promoted_to_fp16, x = linear_63_cast_fp16)[name = string("clip_125_cast_fp16")]; + fp16 var_33_promoted_62_to_fp16 = const()[name = string("op_33_promoted_62_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2947_cast_fp16 = pow(x = clip_125_cast_fp16, y = var_33_promoted_62_to_fp16)[name = string("op_2947_cast_fp16")]; + tensor var_2949_axes_0 = const()[name = string("op_2949_axes_0"), val = tensor([-1])]; + bool var_2949_keep_dims_0 = const()[name = string("op_2949_keep_dims_0"), val = bool(true)]; + tensor var_2949_cast_fp16 = reduce_mean(axes = var_2949_axes_0, keep_dims = var_2949_keep_dims_0, x = var_2947_cast_fp16)[name = string("op_2949_cast_fp16")]; + fp16 var_2950_to_fp16 = const()[name = string("op_2950_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_125_cast_fp16 = add(x = var_2949_cast_fp16, y = var_2950_to_fp16)[name = string("mean_squared_125_cast_fp16")]; + tensor var_2952_cast_fp16 = pow(x = mean_squared_125_cast_fp16, y = var_27_to_fp16)[name = string("op_2952_cast_fp16")]; + tensor normed_output_231_cast_fp16 = mul(x = clip_125_cast_fp16, y = var_2952_cast_fp16)[name = string("normed_output_231_cast_fp16")]; + tensor const_221_to_fp16 = const()[name = string("const_221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206116992)))]; + tensor normed_output_233_cast_fp16 = mul(x = normed_output_231_cast_fp16, y = const_221_to_fp16)[name = string("normed_output_233_cast_fp16")]; + tensor hidden_states_525_cast_fp16 = add(x = hidden_states_505_cast_fp16, y = normed_output_233_cast_fp16)[name = string("hidden_states_525_cast_fp16")]; + fp16 var_33_promoted_63_to_fp16 = const()[name = string("op_33_promoted_63_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2966_cast_fp16 = pow(x = hidden_states_525_cast_fp16, y = var_33_promoted_63_to_fp16)[name = string("op_2966_cast_fp16")]; + tensor var_2968_axes_0 = const()[name = string("op_2968_axes_0"), val = tensor([-1])]; + bool var_2968_keep_dims_0 = const()[name = string("op_2968_keep_dims_0"), val = bool(true)]; + tensor var_2968_cast_fp16 = reduce_mean(axes = var_2968_axes_0, keep_dims = var_2968_keep_dims_0, x = var_2966_cast_fp16)[name = string("op_2968_cast_fp16")]; + fp16 var_2969_to_fp16 = const()[name = string("op_2969_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_127_cast_fp16 = add(x = var_2968_cast_fp16, y = var_2969_to_fp16)[name = string("mean_squared_127_cast_fp16")]; + tensor var_2971_cast_fp16 = pow(x = mean_squared_127_cast_fp16, y = var_27_to_fp16)[name = string("op_2971_cast_fp16")]; + tensor normed_output_235_cast_fp16 = mul(x = hidden_states_525_cast_fp16, y = var_2971_cast_fp16)[name = string("normed_output_235_cast_fp16")]; + tensor const_222_to_fp16 = const()[name = string("const_222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206118592)))]; + tensor normed_output_237_cast_fp16 = mul(x = normed_output_235_cast_fp16, y = const_222_to_fp16)[name = string("normed_output_237_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.7p+3)]; + fp16 model_vision_tower_encoder_layers_9_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.6cp+3)]; + tensor clip_126_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_237_cast_fp16)[name = string("clip_126_cast_fp16")]; + tensor model_vision_tower_encoder_layers_9_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206120192)))]; + tensor linear_64_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_9_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_126_cast_fp16)[name = string("linear_64_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.dp+3)]; + fp16 model_vision_tower_encoder_layers_9_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.ccp+3)]; + tensor clip_127_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_64_cast_fp16)[name = string("clip_127_cast_fp16")]; + tensor var_2993 = const()[name = string("op_2993"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_533_cast_fp16 = reshape(shape = var_2993, x = clip_127_cast_fp16)[name = string("hidden_states_533_cast_fp16")]; + fp16 var_33_promoted_64_to_fp16 = const()[name = string("op_33_promoted_64_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2997_cast_fp16 = pow(x = hidden_states_533_cast_fp16, y = var_33_promoted_64_to_fp16)[name = string("op_2997_cast_fp16")]; + tensor var_2999_axes_0 = const()[name = string("op_2999_axes_0"), val = tensor([-1])]; + bool var_2999_keep_dims_0 = const()[name = string("op_2999_keep_dims_0"), val = bool(true)]; + tensor var_2999_cast_fp16 = reduce_mean(axes = var_2999_axes_0, keep_dims = var_2999_keep_dims_0, x = var_2997_cast_fp16)[name = string("op_2999_cast_fp16")]; + fp16 var_3000_to_fp16 = const()[name = string("op_3000_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_129_cast_fp16 = add(x = var_2999_cast_fp16, y = var_3000_to_fp16)[name = string("mean_squared_129_cast_fp16")]; + tensor var_3002_cast_fp16 = pow(x = mean_squared_129_cast_fp16, y = var_27_to_fp16)[name = string("op_3002_cast_fp16")]; + tensor normed_output_239_cast_fp16 = mul(x = hidden_states_533_cast_fp16, y = var_3002_cast_fp16)[name = string("normed_output_239_cast_fp16")]; + tensor const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207299904)))]; + tensor normed_output_241_cast_fp16 = mul(x = normed_output_239_cast_fp16, y = const_225_to_fp16)[name = string("normed_output_241_cast_fp16")]; + tensor var_3022 = const()[name = string("op_3022"), val = tensor([32, 32])]; + int32 var_3023_axis_0 = const()[name = string("op_3023_axis_0"), val = int32(-1)]; + tensor var_3023_cast_fp16_0, tensor var_3023_cast_fp16_1 = split(axis = var_3023_axis_0, split_sizes = var_3022, x = normed_output_241_cast_fp16)[name = string("op_3023_cast_fp16")]; + tensor var_3026 = const()[name = string("op_3026"), val = tensor([32, 32])]; + int32 var_3027_axis_0 = const()[name = string("op_3027_axis_0"), val = int32(-1)]; + tensor var_3027_0, tensor var_3027_1 = split(axis = var_3027_axis_0, split_sizes = var_3026, x = var_160_cast_fp16)[name = string("op_3027")]; + tensor var_3030 = const()[name = string("op_3030"), val = tensor([32, 32])]; + int32 var_3031_axis_0 = const()[name = string("op_3031_axis_0"), val = int32(-1)]; + tensor var_3031_0, tensor var_3031_1 = split(axis = var_3031_axis_0, split_sizes = var_3030, x = var_163_cast_fp16)[name = string("op_3031")]; + tensor cos_149_axes_0 = const()[name = string("cos_149_axes_0"), val = tensor([2])]; + tensor cos_149 = expand_dims(axes = cos_149_axes_0, x = var_3027_0)[name = string("cos_149")]; + tensor sin_149_axes_0 = const()[name = string("sin_149_axes_0"), val = tensor([2])]; + tensor sin_149 = expand_dims(axes = sin_149_axes_0, x = var_3031_0)[name = string("sin_149")]; + tensor var_3036_cast_fp16 = mul(x = var_3023_cast_fp16_0, y = cos_149)[name = string("op_3036_cast_fp16")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = var_3023_cast_fp16_0)[name = string("x1_73_cast_fp16")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = var_3023_cast_fp16_0)[name = string("x2_73_cast_fp16")]; + fp16 const_230_promoted_to_fp16 = const()[name = string("const_230_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3047_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_230_promoted_to_fp16)[name = string("op_3047_cast_fp16")]; + bool var_3049_interleave_0 = const()[name = string("op_3049_interleave_0"), val = bool(false)]; + tensor var_3049_cast_fp16 = concat(axis = var_38, interleave = var_3049_interleave_0, values = (var_3047_cast_fp16, x1_73_cast_fp16))[name = string("op_3049_cast_fp16")]; + tensor var_3050_cast_fp16 = mul(x = var_3049_cast_fp16, y = sin_149)[name = string("op_3050_cast_fp16")]; + tensor var_3051_cast_fp16 = add(x = var_3036_cast_fp16, y = var_3050_cast_fp16)[name = string("op_3051_cast_fp16")]; + tensor cos_153_axes_0 = const()[name = string("cos_153_axes_0"), val = tensor([2])]; + tensor cos_153 = expand_dims(axes = cos_153_axes_0, x = var_3027_1)[name = string("cos_153")]; + tensor sin_153_axes_0 = const()[name = string("sin_153_axes_0"), val = tensor([2])]; + tensor sin_153 = expand_dims(axes = sin_153_axes_0, x = var_3031_1)[name = string("sin_153")]; + tensor var_3054_cast_fp16 = mul(x = var_3023_cast_fp16_1, y = cos_153)[name = string("op_3054_cast_fp16")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = var_3023_cast_fp16_1)[name = string("x1_75_cast_fp16")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = var_3023_cast_fp16_1)[name = string("x2_75_cast_fp16")]; + fp16 const_233_promoted_to_fp16 = const()[name = string("const_233_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3065_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_233_promoted_to_fp16)[name = string("op_3065_cast_fp16")]; + bool var_3067_interleave_0 = const()[name = string("op_3067_interleave_0"), val = bool(false)]; + tensor var_3067_cast_fp16 = concat(axis = var_38, interleave = var_3067_interleave_0, values = (var_3065_cast_fp16, x1_75_cast_fp16))[name = string("op_3067_cast_fp16")]; + tensor var_3068_cast_fp16 = mul(x = var_3067_cast_fp16, y = sin_153)[name = string("op_3068_cast_fp16")]; + tensor var_3069_cast_fp16 = add(x = var_3054_cast_fp16, y = var_3068_cast_fp16)[name = string("op_3069_cast_fp16")]; + bool query_states_19_interleave_0 = const()[name = string("query_states_19_interleave_0"), val = bool(false)]; + tensor query_states_19_cast_fp16 = concat(axis = var_38, interleave = query_states_19_interleave_0, values = (var_3051_cast_fp16, var_3069_cast_fp16))[name = string("query_states_19_cast_fp16")]; + tensor model_vision_tower_encoder_layers_9_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207300096)))]; + tensor linear_65_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_9_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_126_cast_fp16)[name = string("linear_65_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.fep+3)]; + fp16 model_vision_tower_encoder_layers_9_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.fap+3)]; + tensor clip_129_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_65_cast_fp16)[name = string("clip_129_cast_fp16")]; + tensor var_3082 = const()[name = string("op_3082"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_539_cast_fp16 = reshape(shape = var_3082, x = clip_129_cast_fp16)[name = string("hidden_states_539_cast_fp16")]; + fp16 var_33_promoted_65_to_fp16 = const()[name = string("op_33_promoted_65_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3086_cast_fp16 = pow(x = hidden_states_539_cast_fp16, y = var_33_promoted_65_to_fp16)[name = string("op_3086_cast_fp16")]; + tensor var_3088_axes_0 = const()[name = string("op_3088_axes_0"), val = tensor([-1])]; + bool var_3088_keep_dims_0 = const()[name = string("op_3088_keep_dims_0"), val = bool(true)]; + tensor var_3088_cast_fp16 = reduce_mean(axes = var_3088_axes_0, keep_dims = var_3088_keep_dims_0, x = var_3086_cast_fp16)[name = string("op_3088_cast_fp16")]; + fp16 var_3089_to_fp16 = const()[name = string("op_3089_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_131_cast_fp16 = add(x = var_3088_cast_fp16, y = var_3089_to_fp16)[name = string("mean_squared_131_cast_fp16")]; + tensor var_3091_cast_fp16 = pow(x = mean_squared_131_cast_fp16, y = var_27_to_fp16)[name = string("op_3091_cast_fp16")]; + tensor normed_output_243_cast_fp16 = mul(x = hidden_states_539_cast_fp16, y = var_3091_cast_fp16)[name = string("normed_output_243_cast_fp16")]; + tensor const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208479808)))]; + tensor normed_output_245_cast_fp16 = mul(x = normed_output_243_cast_fp16, y = const_234_to_fp16)[name = string("normed_output_245_cast_fp16")]; + tensor var_3111 = const()[name = string("op_3111"), val = tensor([32, 32])]; + int32 var_3112_axis_0 = const()[name = string("op_3112_axis_0"), val = int32(-1)]; + tensor var_3112_cast_fp16_0, tensor var_3112_cast_fp16_1 = split(axis = var_3112_axis_0, split_sizes = var_3111, x = normed_output_245_cast_fp16)[name = string("op_3112_cast_fp16")]; + tensor var_3115 = const()[name = string("op_3115"), val = tensor([32, 32])]; + int32 var_3116_axis_0 = const()[name = string("op_3116_axis_0"), val = int32(-1)]; + tensor var_3116_0, tensor var_3116_1 = split(axis = var_3116_axis_0, split_sizes = var_3115, x = var_160_cast_fp16)[name = string("op_3116")]; + tensor var_3119 = const()[name = string("op_3119"), val = tensor([32, 32])]; + int32 var_3120_axis_0 = const()[name = string("op_3120_axis_0"), val = int32(-1)]; + tensor var_3120_0, tensor var_3120_1 = split(axis = var_3120_axis_0, split_sizes = var_3119, x = var_163_cast_fp16)[name = string("op_3120")]; + tensor cos_157_axes_0 = const()[name = string("cos_157_axes_0"), val = tensor([2])]; + tensor cos_157 = expand_dims(axes = cos_157_axes_0, x = var_3116_0)[name = string("cos_157")]; + tensor sin_157_axes_0 = const()[name = string("sin_157_axes_0"), val = tensor([2])]; + tensor sin_157 = expand_dims(axes = sin_157_axes_0, x = var_3120_0)[name = string("sin_157")]; + tensor var_3125_cast_fp16 = mul(x = var_3112_cast_fp16_0, y = cos_157)[name = string("op_3125_cast_fp16")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = var_3112_cast_fp16_0)[name = string("x1_77_cast_fp16")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = var_3112_cast_fp16_0)[name = string("x2_77_cast_fp16")]; + fp16 const_239_promoted_to_fp16 = const()[name = string("const_239_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3136_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_239_promoted_to_fp16)[name = string("op_3136_cast_fp16")]; + bool var_3138_interleave_0 = const()[name = string("op_3138_interleave_0"), val = bool(false)]; + tensor var_3138_cast_fp16 = concat(axis = var_38, interleave = var_3138_interleave_0, values = (var_3136_cast_fp16, x1_77_cast_fp16))[name = string("op_3138_cast_fp16")]; + tensor var_3139_cast_fp16 = mul(x = var_3138_cast_fp16, y = sin_157)[name = string("op_3139_cast_fp16")]; + tensor var_3140_cast_fp16 = add(x = var_3125_cast_fp16, y = var_3139_cast_fp16)[name = string("op_3140_cast_fp16")]; + tensor cos_161_axes_0 = const()[name = string("cos_161_axes_0"), val = tensor([2])]; + tensor cos_161 = expand_dims(axes = cos_161_axes_0, x = var_3116_1)[name = string("cos_161")]; + tensor sin_161_axes_0 = const()[name = string("sin_161_axes_0"), val = tensor([2])]; + tensor sin_161 = expand_dims(axes = sin_161_axes_0, x = var_3120_1)[name = string("sin_161")]; + tensor var_3143_cast_fp16 = mul(x = var_3112_cast_fp16_1, y = cos_161)[name = string("op_3143_cast_fp16")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = var_3112_cast_fp16_1)[name = string("x1_79_cast_fp16")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = var_3112_cast_fp16_1)[name = string("x2_79_cast_fp16")]; + fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3154_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_242_promoted_to_fp16)[name = string("op_3154_cast_fp16")]; + bool var_3156_interleave_0 = const()[name = string("op_3156_interleave_0"), val = bool(false)]; + tensor var_3156_cast_fp16 = concat(axis = var_38, interleave = var_3156_interleave_0, values = (var_3154_cast_fp16, x1_79_cast_fp16))[name = string("op_3156_cast_fp16")]; + tensor var_3157_cast_fp16 = mul(x = var_3156_cast_fp16, y = sin_161)[name = string("op_3157_cast_fp16")]; + tensor var_3158_cast_fp16 = add(x = var_3143_cast_fp16, y = var_3157_cast_fp16)[name = string("op_3158_cast_fp16")]; + bool key_states_19_interleave_0 = const()[name = string("key_states_19_interleave_0"), val = bool(false)]; + tensor key_states_19_cast_fp16 = concat(axis = var_38, interleave = key_states_19_interleave_0, values = (var_3140_cast_fp16, var_3158_cast_fp16))[name = string("key_states_19_cast_fp16")]; + tensor model_vision_tower_encoder_layers_9_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208480000)))]; + tensor linear_66_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_9_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_126_cast_fp16)[name = string("linear_66_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.fep+3)]; + fp16 model_vision_tower_encoder_layers_9_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.fap+3)]; + tensor clip_131_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_66_cast_fp16)[name = string("clip_131_cast_fp16")]; + tensor var_3171 = const()[name = string("op_3171"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_545_cast_fp16 = reshape(shape = var_3171, x = clip_131_cast_fp16)[name = string("hidden_states_545_cast_fp16")]; + fp16 var_33_promoted_66_to_fp16 = const()[name = string("op_33_promoted_66_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3174_cast_fp16 = pow(x = hidden_states_545_cast_fp16, y = var_33_promoted_66_to_fp16)[name = string("op_3174_cast_fp16")]; + tensor var_3176_axes_0 = const()[name = string("op_3176_axes_0"), val = tensor([-1])]; + bool var_3176_keep_dims_0 = const()[name = string("op_3176_keep_dims_0"), val = bool(true)]; + tensor var_3176_cast_fp16 = reduce_mean(axes = var_3176_axes_0, keep_dims = var_3176_keep_dims_0, x = var_3174_cast_fp16)[name = string("op_3176_cast_fp16")]; + fp16 var_3177_to_fp16 = const()[name = string("op_3177_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_133_cast_fp16 = add(x = var_3176_cast_fp16, y = var_3177_to_fp16)[name = string("mean_squared_133_cast_fp16")]; + tensor var_3179_cast_fp16 = pow(x = mean_squared_133_cast_fp16, y = var_27_to_fp16)[name = string("op_3179_cast_fp16")]; + tensor normed_output_247_cast_fp16 = mul(x = hidden_states_545_cast_fp16, y = var_3179_cast_fp16)[name = string("normed_output_247_cast_fp16")]; + tensor hidden_states_551_perm_0 = const()[name = string("hidden_states_551_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(true)]; + bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; + tensor transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_83 = transpose(perm = transpose_83_perm_0, x = key_states_19_cast_fp16)[name = string("transpose_121")]; + tensor transpose_82 = transpose(perm = transpose_82_perm_0, x = query_states_19_cast_fp16)[name = string("transpose_122")]; + tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("matmul_9_cast_fp16")]; + tensor add_9_cast_fp16 = add(x = matmul_9_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_9_cast_fp16")]; + int32 softmax_9_axis_0 = const()[name = string("softmax_9_axis_0"), val = int32(-1)]; + tensor softmax_9_cast_fp16 = softmax(axis = softmax_9_axis_0, x = add_9_cast_fp16)[name = string("softmax_9_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor hidden_states_551_cast_fp16 = transpose(perm = hidden_states_551_perm_0, x = normed_output_247_cast_fp16)[name = string("transpose_123")]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = softmax_9_cast_fp16, y = hidden_states_551_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_3184_perm_0 = const()[name = string("op_3184_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3186 = const()[name = string("op_3186"), val = tensor([1, 2304, -1])]; + tensor var_3184_cast_fp16 = transpose(perm = var_3184_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_120")]; + tensor var_3187_cast_fp16 = reshape(shape = var_3186, x = var_3184_cast_fp16)[name = string("op_3187_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.eep+0)]; + fp16 model_vision_tower_encoder_layers_9_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.eap+0)]; + tensor clip_132_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_self_attn_o_proj_input_max_promoted_to_fp16, x = var_3187_cast_fp16)[name = string("clip_132_cast_fp16")]; + tensor model_vision_tower_encoder_layers_9_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209659712)))]; + tensor linear_67_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_9_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_132_cast_fp16)[name = string("linear_67_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.3ep+1)]; + fp16 model_vision_tower_encoder_layers_9_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.3cp+1)]; + tensor clip_133_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_67_cast_fp16)[name = string("clip_133_cast_fp16")]; + fp16 var_33_promoted_67_to_fp16 = const()[name = string("op_33_promoted_67_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3200_cast_fp16 = pow(x = clip_133_cast_fp16, y = var_33_promoted_67_to_fp16)[name = string("op_3200_cast_fp16")]; + tensor var_3202_axes_0 = const()[name = string("op_3202_axes_0"), val = tensor([-1])]; + bool var_3202_keep_dims_0 = const()[name = string("op_3202_keep_dims_0"), val = bool(true)]; + tensor var_3202_cast_fp16 = reduce_mean(axes = var_3202_axes_0, keep_dims = var_3202_keep_dims_0, x = var_3200_cast_fp16)[name = string("op_3202_cast_fp16")]; + fp16 var_3203_to_fp16 = const()[name = string("op_3203_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_135_cast_fp16 = add(x = var_3202_cast_fp16, y = var_3203_to_fp16)[name = string("mean_squared_135_cast_fp16")]; + tensor var_3205_cast_fp16 = pow(x = mean_squared_135_cast_fp16, y = var_27_to_fp16)[name = string("op_3205_cast_fp16")]; + tensor normed_output_249_cast_fp16 = mul(x = clip_133_cast_fp16, y = var_3205_cast_fp16)[name = string("normed_output_249_cast_fp16")]; + tensor const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210839424)))]; + tensor normed_output_251_cast_fp16 = mul(x = normed_output_249_cast_fp16, y = const_243_to_fp16)[name = string("normed_output_251_cast_fp16")]; + tensor hidden_states_563_cast_fp16 = add(x = hidden_states_525_cast_fp16, y = normed_output_251_cast_fp16)[name = string("hidden_states_563_cast_fp16")]; + fp16 var_33_promoted_68_to_fp16 = const()[name = string("op_33_promoted_68_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3213_cast_fp16 = pow(x = hidden_states_563_cast_fp16, y = var_33_promoted_68_to_fp16)[name = string("op_3213_cast_fp16")]; + tensor var_3215_axes_0 = const()[name = string("op_3215_axes_0"), val = tensor([-1])]; + bool var_3215_keep_dims_0 = const()[name = string("op_3215_keep_dims_0"), val = bool(true)]; + tensor var_3215_cast_fp16 = reduce_mean(axes = var_3215_axes_0, keep_dims = var_3215_keep_dims_0, x = var_3213_cast_fp16)[name = string("op_3215_cast_fp16")]; + fp16 var_3216_to_fp16 = const()[name = string("op_3216_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_137_cast_fp16 = add(x = var_3215_cast_fp16, y = var_3216_to_fp16)[name = string("mean_squared_137_cast_fp16")]; + tensor var_3218_cast_fp16 = pow(x = mean_squared_137_cast_fp16, y = var_27_to_fp16)[name = string("op_3218_cast_fp16")]; + tensor normed_output_253_cast_fp16 = mul(x = hidden_states_563_cast_fp16, y = var_3218_cast_fp16)[name = string("normed_output_253_cast_fp16")]; + tensor const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210841024)))]; + tensor normed_output_255_cast_fp16 = mul(x = normed_output_253_cast_fp16, y = const_244_to_fp16)[name = string("normed_output_255_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.16p+3)]; + fp16 model_vision_tower_encoder_layers_9_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.14p+3)]; + tensor clip_134_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_255_cast_fp16)[name = string("clip_134_cast_fp16")]; + tensor model_vision_tower_encoder_layers_9_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210842624)))]; + tensor linear_68_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_9_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_134_cast_fp16)[name = string("linear_68_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.3p+3)]; + fp16 model_vision_tower_encoder_layers_9_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.2ep+3)]; + tensor clip_135_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_68_cast_fp16)[name = string("clip_135_cast_fp16")]; + string var_3235_mode_0 = const()[name = string("op_3235_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3235_cast_fp16 = gelu(mode = var_3235_mode_0, x = clip_135_cast_fp16)[name = string("op_3235_cast_fp16")]; + tensor model_vision_tower_encoder_layers_9_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215561280)))]; + tensor linear_69_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_9_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_134_cast_fp16)[name = string("linear_69_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.3p+3)]; + fp16 model_vision_tower_encoder_layers_9_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.2ep+3)]; + tensor clip_137_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_mlp_up_proj_output_max_promoted_to_fp16, x = linear_69_cast_fp16)[name = string("clip_137_cast_fp16")]; + tensor hidden_states_573_cast_fp16 = mul(x = var_3235_cast_fp16, y = clip_137_cast_fp16)[name = string("hidden_states_573_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.cap+4)]; + fp16 model_vision_tower_encoder_layers_9_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.c6p+4)]; + tensor clip_138_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_573_cast_fp16)[name = string("clip_138_cast_fp16")]; + tensor model_vision_tower_encoder_layers_9_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220279936)))]; + tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_9_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_138_cast_fp16)[name = string("linear_70_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_9_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.4ep+3)]; + fp16 model_vision_tower_encoder_layers_9_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_9_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.4ap+3)]; + tensor clip_139_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_9_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_9_mlp_down_proj_output_max_promoted_to_fp16, x = linear_70_cast_fp16)[name = string("clip_139_cast_fp16")]; + fp16 var_33_promoted_69_to_fp16 = const()[name = string("op_33_promoted_69_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3257_cast_fp16 = pow(x = clip_139_cast_fp16, y = var_33_promoted_69_to_fp16)[name = string("op_3257_cast_fp16")]; + tensor var_3259_axes_0 = const()[name = string("op_3259_axes_0"), val = tensor([-1])]; + bool var_3259_keep_dims_0 = const()[name = string("op_3259_keep_dims_0"), val = bool(true)]; + tensor var_3259_cast_fp16 = reduce_mean(axes = var_3259_axes_0, keep_dims = var_3259_keep_dims_0, x = var_3257_cast_fp16)[name = string("op_3259_cast_fp16")]; + fp16 var_3260_to_fp16 = const()[name = string("op_3260_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_139_cast_fp16 = add(x = var_3259_cast_fp16, y = var_3260_to_fp16)[name = string("mean_squared_139_cast_fp16")]; + tensor var_3262_cast_fp16 = pow(x = mean_squared_139_cast_fp16, y = var_27_to_fp16)[name = string("op_3262_cast_fp16")]; + tensor normed_output_257_cast_fp16 = mul(x = clip_139_cast_fp16, y = var_3262_cast_fp16)[name = string("normed_output_257_cast_fp16")]; + tensor const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224998592)))]; + tensor normed_output_259_cast_fp16 = mul(x = normed_output_257_cast_fp16, y = const_245_to_fp16)[name = string("normed_output_259_cast_fp16")]; + tensor hidden_states_583_cast_fp16 = add(x = hidden_states_563_cast_fp16, y = normed_output_259_cast_fp16)[name = string("hidden_states_583_cast_fp16")]; + fp16 var_33_promoted_70_to_fp16 = const()[name = string("op_33_promoted_70_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3276_cast_fp16 = pow(x = hidden_states_583_cast_fp16, y = var_33_promoted_70_to_fp16)[name = string("op_3276_cast_fp16")]; + tensor var_3278_axes_0 = const()[name = string("op_3278_axes_0"), val = tensor([-1])]; + bool var_3278_keep_dims_0 = const()[name = string("op_3278_keep_dims_0"), val = bool(true)]; + tensor var_3278_cast_fp16 = reduce_mean(axes = var_3278_axes_0, keep_dims = var_3278_keep_dims_0, x = var_3276_cast_fp16)[name = string("op_3278_cast_fp16")]; + fp16 var_3279_to_fp16 = const()[name = string("op_3279_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_141_cast_fp16 = add(x = var_3278_cast_fp16, y = var_3279_to_fp16)[name = string("mean_squared_141_cast_fp16")]; + tensor var_3281_cast_fp16 = pow(x = mean_squared_141_cast_fp16, y = var_27_to_fp16)[name = string("op_3281_cast_fp16")]; + tensor normed_output_261_cast_fp16 = mul(x = hidden_states_583_cast_fp16, y = var_3281_cast_fp16)[name = string("normed_output_261_cast_fp16")]; + tensor const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225000192)))]; + tensor normed_output_263_cast_fp16 = mul(x = normed_output_261_cast_fp16, y = const_246_to_fp16)[name = string("normed_output_263_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.b6p+3)]; + fp16 model_vision_tower_encoder_layers_10_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.b4p+3)]; + tensor clip_140_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_263_cast_fp16)[name = string("clip_140_cast_fp16")]; + tensor model_vision_tower_encoder_layers_10_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225001792)))]; + tensor linear_71_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_10_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_140_cast_fp16)[name = string("linear_71_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.eap+3)]; + fp16 model_vision_tower_encoder_layers_10_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.e6p+3)]; + tensor clip_141_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_71_cast_fp16)[name = string("clip_141_cast_fp16")]; + tensor var_3303 = const()[name = string("op_3303"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_591_cast_fp16 = reshape(shape = var_3303, x = clip_141_cast_fp16)[name = string("hidden_states_591_cast_fp16")]; + fp16 var_33_promoted_71_to_fp16 = const()[name = string("op_33_promoted_71_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3307_cast_fp16 = pow(x = hidden_states_591_cast_fp16, y = var_33_promoted_71_to_fp16)[name = string("op_3307_cast_fp16")]; + tensor var_3309_axes_0 = const()[name = string("op_3309_axes_0"), val = tensor([-1])]; + bool var_3309_keep_dims_0 = const()[name = string("op_3309_keep_dims_0"), val = bool(true)]; + tensor var_3309_cast_fp16 = reduce_mean(axes = var_3309_axes_0, keep_dims = var_3309_keep_dims_0, x = var_3307_cast_fp16)[name = string("op_3309_cast_fp16")]; + fp16 var_3310_to_fp16 = const()[name = string("op_3310_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_143_cast_fp16 = add(x = var_3309_cast_fp16, y = var_3310_to_fp16)[name = string("mean_squared_143_cast_fp16")]; + tensor var_3312_cast_fp16 = pow(x = mean_squared_143_cast_fp16, y = var_27_to_fp16)[name = string("op_3312_cast_fp16")]; + tensor normed_output_265_cast_fp16 = mul(x = hidden_states_591_cast_fp16, y = var_3312_cast_fp16)[name = string("normed_output_265_cast_fp16")]; + tensor const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226181504)))]; + tensor normed_output_267_cast_fp16 = mul(x = normed_output_265_cast_fp16, y = const_249_to_fp16)[name = string("normed_output_267_cast_fp16")]; + tensor var_3332 = const()[name = string("op_3332"), val = tensor([32, 32])]; + int32 var_3333_axis_0 = const()[name = string("op_3333_axis_0"), val = int32(-1)]; + tensor var_3333_cast_fp16_0, tensor var_3333_cast_fp16_1 = split(axis = var_3333_axis_0, split_sizes = var_3332, x = normed_output_267_cast_fp16)[name = string("op_3333_cast_fp16")]; + tensor var_3336 = const()[name = string("op_3336"), val = tensor([32, 32])]; + int32 var_3337_axis_0 = const()[name = string("op_3337_axis_0"), val = int32(-1)]; + tensor var_3337_0, tensor var_3337_1 = split(axis = var_3337_axis_0, split_sizes = var_3336, x = var_160_cast_fp16)[name = string("op_3337")]; + tensor var_3340 = const()[name = string("op_3340"), val = tensor([32, 32])]; + int32 var_3341_axis_0 = const()[name = string("op_3341_axis_0"), val = int32(-1)]; + tensor var_3341_0, tensor var_3341_1 = split(axis = var_3341_axis_0, split_sizes = var_3340, x = var_163_cast_fp16)[name = string("op_3341")]; + tensor cos_165_axes_0 = const()[name = string("cos_165_axes_0"), val = tensor([2])]; + tensor cos_165 = expand_dims(axes = cos_165_axes_0, x = var_3337_0)[name = string("cos_165")]; + tensor sin_165_axes_0 = const()[name = string("sin_165_axes_0"), val = tensor([2])]; + tensor sin_165 = expand_dims(axes = sin_165_axes_0, x = var_3341_0)[name = string("sin_165")]; + tensor var_3346_cast_fp16 = mul(x = var_3333_cast_fp16_0, y = cos_165)[name = string("op_3346_cast_fp16")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = var_3333_cast_fp16_0)[name = string("x1_81_cast_fp16")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = var_3333_cast_fp16_0)[name = string("x2_81_cast_fp16")]; + fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3357_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_3357_cast_fp16")]; + bool var_3359_interleave_0 = const()[name = string("op_3359_interleave_0"), val = bool(false)]; + tensor var_3359_cast_fp16 = concat(axis = var_38, interleave = var_3359_interleave_0, values = (var_3357_cast_fp16, x1_81_cast_fp16))[name = string("op_3359_cast_fp16")]; + tensor var_3360_cast_fp16 = mul(x = var_3359_cast_fp16, y = sin_165)[name = string("op_3360_cast_fp16")]; + tensor var_3361_cast_fp16 = add(x = var_3346_cast_fp16, y = var_3360_cast_fp16)[name = string("op_3361_cast_fp16")]; + tensor cos_169_axes_0 = const()[name = string("cos_169_axes_0"), val = tensor([2])]; + tensor cos_169 = expand_dims(axes = cos_169_axes_0, x = var_3337_1)[name = string("cos_169")]; + tensor sin_169_axes_0 = const()[name = string("sin_169_axes_0"), val = tensor([2])]; + tensor sin_169 = expand_dims(axes = sin_169_axes_0, x = var_3341_1)[name = string("sin_169")]; + tensor var_3364_cast_fp16 = mul(x = var_3333_cast_fp16_1, y = cos_169)[name = string("op_3364_cast_fp16")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = var_3333_cast_fp16_1)[name = string("x1_83_cast_fp16")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = var_3333_cast_fp16_1)[name = string("x2_83_cast_fp16")]; + fp16 const_257_promoted_to_fp16 = const()[name = string("const_257_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3375_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_257_promoted_to_fp16)[name = string("op_3375_cast_fp16")]; + bool var_3377_interleave_0 = const()[name = string("op_3377_interleave_0"), val = bool(false)]; + tensor var_3377_cast_fp16 = concat(axis = var_38, interleave = var_3377_interleave_0, values = (var_3375_cast_fp16, x1_83_cast_fp16))[name = string("op_3377_cast_fp16")]; + tensor var_3378_cast_fp16 = mul(x = var_3377_cast_fp16, y = sin_169)[name = string("op_3378_cast_fp16")]; + tensor var_3379_cast_fp16 = add(x = var_3364_cast_fp16, y = var_3378_cast_fp16)[name = string("op_3379_cast_fp16")]; + bool query_states_21_interleave_0 = const()[name = string("query_states_21_interleave_0"), val = bool(false)]; + tensor query_states_21_cast_fp16 = concat(axis = var_38, interleave = query_states_21_interleave_0, values = (var_3361_cast_fp16, var_3379_cast_fp16))[name = string("query_states_21_cast_fp16")]; + tensor model_vision_tower_encoder_layers_10_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226181696)))]; + tensor linear_72_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_10_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_140_cast_fp16)[name = string("linear_72_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.52p+4)]; + fp16 model_vision_tower_encoder_layers_10_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5p+4)]; + tensor clip_143_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_72_cast_fp16)[name = string("clip_143_cast_fp16")]; + tensor var_3392 = const()[name = string("op_3392"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_597_cast_fp16 = reshape(shape = var_3392, x = clip_143_cast_fp16)[name = string("hidden_states_597_cast_fp16")]; + fp16 var_33_promoted_72_to_fp16 = const()[name = string("op_33_promoted_72_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3396_cast_fp16 = pow(x = hidden_states_597_cast_fp16, y = var_33_promoted_72_to_fp16)[name = string("op_3396_cast_fp16")]; + tensor var_3398_axes_0 = const()[name = string("op_3398_axes_0"), val = tensor([-1])]; + bool var_3398_keep_dims_0 = const()[name = string("op_3398_keep_dims_0"), val = bool(true)]; + tensor var_3398_cast_fp16 = reduce_mean(axes = var_3398_axes_0, keep_dims = var_3398_keep_dims_0, x = var_3396_cast_fp16)[name = string("op_3398_cast_fp16")]; + fp16 var_3399_to_fp16 = const()[name = string("op_3399_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_145_cast_fp16 = add(x = var_3398_cast_fp16, y = var_3399_to_fp16)[name = string("mean_squared_145_cast_fp16")]; + tensor var_3401_cast_fp16 = pow(x = mean_squared_145_cast_fp16, y = var_27_to_fp16)[name = string("op_3401_cast_fp16")]; + tensor normed_output_269_cast_fp16 = mul(x = hidden_states_597_cast_fp16, y = var_3401_cast_fp16)[name = string("normed_output_269_cast_fp16")]; + tensor const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227361408)))]; + tensor normed_output_271_cast_fp16 = mul(x = normed_output_269_cast_fp16, y = const_258_to_fp16)[name = string("normed_output_271_cast_fp16")]; + tensor var_3421 = const()[name = string("op_3421"), val = tensor([32, 32])]; + int32 var_3422_axis_0 = const()[name = string("op_3422_axis_0"), val = int32(-1)]; + tensor var_3422_cast_fp16_0, tensor var_3422_cast_fp16_1 = split(axis = var_3422_axis_0, split_sizes = var_3421, x = normed_output_271_cast_fp16)[name = string("op_3422_cast_fp16")]; + tensor var_3425 = const()[name = string("op_3425"), val = tensor([32, 32])]; + int32 var_3426_axis_0 = const()[name = string("op_3426_axis_0"), val = int32(-1)]; + tensor var_3426_0, tensor var_3426_1 = split(axis = var_3426_axis_0, split_sizes = var_3425, x = var_160_cast_fp16)[name = string("op_3426")]; + tensor var_3429 = const()[name = string("op_3429"), val = tensor([32, 32])]; + int32 var_3430_axis_0 = const()[name = string("op_3430_axis_0"), val = int32(-1)]; + tensor var_3430_0, tensor var_3430_1 = split(axis = var_3430_axis_0, split_sizes = var_3429, x = var_163_cast_fp16)[name = string("op_3430")]; + tensor cos_173_axes_0 = const()[name = string("cos_173_axes_0"), val = tensor([2])]; + tensor cos_173 = expand_dims(axes = cos_173_axes_0, x = var_3426_0)[name = string("cos_173")]; + tensor sin_173_axes_0 = const()[name = string("sin_173_axes_0"), val = tensor([2])]; + tensor sin_173 = expand_dims(axes = sin_173_axes_0, x = var_3430_0)[name = string("sin_173")]; + tensor var_3435_cast_fp16 = mul(x = var_3422_cast_fp16_0, y = cos_173)[name = string("op_3435_cast_fp16")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = var_3422_cast_fp16_0)[name = string("x1_85_cast_fp16")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = var_3422_cast_fp16_0)[name = string("x2_85_cast_fp16")]; + fp16 const_263_promoted_to_fp16 = const()[name = string("const_263_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3446_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_263_promoted_to_fp16)[name = string("op_3446_cast_fp16")]; + bool var_3448_interleave_0 = const()[name = string("op_3448_interleave_0"), val = bool(false)]; + tensor var_3448_cast_fp16 = concat(axis = var_38, interleave = var_3448_interleave_0, values = (var_3446_cast_fp16, x1_85_cast_fp16))[name = string("op_3448_cast_fp16")]; + tensor var_3449_cast_fp16 = mul(x = var_3448_cast_fp16, y = sin_173)[name = string("op_3449_cast_fp16")]; + tensor var_3450_cast_fp16 = add(x = var_3435_cast_fp16, y = var_3449_cast_fp16)[name = string("op_3450_cast_fp16")]; + tensor cos_177_axes_0 = const()[name = string("cos_177_axes_0"), val = tensor([2])]; + tensor cos_177 = expand_dims(axes = cos_177_axes_0, x = var_3426_1)[name = string("cos_177")]; + tensor sin_177_axes_0 = const()[name = string("sin_177_axes_0"), val = tensor([2])]; + tensor sin_177 = expand_dims(axes = sin_177_axes_0, x = var_3430_1)[name = string("sin_177")]; + tensor var_3453_cast_fp16 = mul(x = var_3422_cast_fp16_1, y = cos_177)[name = string("op_3453_cast_fp16")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = var_3422_cast_fp16_1)[name = string("x1_87_cast_fp16")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = var_3422_cast_fp16_1)[name = string("x2_87_cast_fp16")]; + fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3464_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_3464_cast_fp16")]; + bool var_3466_interleave_0 = const()[name = string("op_3466_interleave_0"), val = bool(false)]; + tensor var_3466_cast_fp16 = concat(axis = var_38, interleave = var_3466_interleave_0, values = (var_3464_cast_fp16, x1_87_cast_fp16))[name = string("op_3466_cast_fp16")]; + tensor var_3467_cast_fp16 = mul(x = var_3466_cast_fp16, y = sin_177)[name = string("op_3467_cast_fp16")]; + tensor var_3468_cast_fp16 = add(x = var_3453_cast_fp16, y = var_3467_cast_fp16)[name = string("op_3468_cast_fp16")]; + bool key_states_21_interleave_0 = const()[name = string("key_states_21_interleave_0"), val = bool(false)]; + tensor key_states_21_cast_fp16 = concat(axis = var_38, interleave = key_states_21_interleave_0, values = (var_3450_cast_fp16, var_3468_cast_fp16))[name = string("key_states_21_cast_fp16")]; + tensor model_vision_tower_encoder_layers_10_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227361600)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_10_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_140_cast_fp16)[name = string("linear_73_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.52p+4)]; + fp16 model_vision_tower_encoder_layers_10_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5p+4)]; + tensor clip_145_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_73_cast_fp16)[name = string("clip_145_cast_fp16")]; + tensor var_3481 = const()[name = string("op_3481"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_603_cast_fp16 = reshape(shape = var_3481, x = clip_145_cast_fp16)[name = string("hidden_states_603_cast_fp16")]; + fp16 var_33_promoted_73_to_fp16 = const()[name = string("op_33_promoted_73_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3484_cast_fp16 = pow(x = hidden_states_603_cast_fp16, y = var_33_promoted_73_to_fp16)[name = string("op_3484_cast_fp16")]; + tensor var_3486_axes_0 = const()[name = string("op_3486_axes_0"), val = tensor([-1])]; + bool var_3486_keep_dims_0 = const()[name = string("op_3486_keep_dims_0"), val = bool(true)]; + tensor var_3486_cast_fp16 = reduce_mean(axes = var_3486_axes_0, keep_dims = var_3486_keep_dims_0, x = var_3484_cast_fp16)[name = string("op_3486_cast_fp16")]; + fp16 var_3487_to_fp16 = const()[name = string("op_3487_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_147_cast_fp16 = add(x = var_3486_cast_fp16, y = var_3487_to_fp16)[name = string("mean_squared_147_cast_fp16")]; + tensor var_3489_cast_fp16 = pow(x = mean_squared_147_cast_fp16, y = var_27_to_fp16)[name = string("op_3489_cast_fp16")]; + tensor normed_output_273_cast_fp16 = mul(x = hidden_states_603_cast_fp16, y = var_3489_cast_fp16)[name = string("normed_output_273_cast_fp16")]; + tensor hidden_states_609_perm_0 = const()[name = string("hidden_states_609_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(true)]; + bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; + tensor transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_85 = transpose(perm = transpose_85_perm_0, x = key_states_21_cast_fp16)[name = string("transpose_117")]; + tensor transpose_84 = transpose(perm = transpose_84_perm_0, x = query_states_21_cast_fp16)[name = string("transpose_118")]; + tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("matmul_10_cast_fp16")]; + tensor add_10_cast_fp16 = add(x = matmul_10_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_10_cast_fp16")]; + int32 softmax_10_axis_0 = const()[name = string("softmax_10_axis_0"), val = int32(-1)]; + tensor softmax_10_cast_fp16 = softmax(axis = softmax_10_axis_0, x = add_10_cast_fp16)[name = string("softmax_10_cast_fp16")]; + bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; + bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; + tensor hidden_states_609_cast_fp16 = transpose(perm = hidden_states_609_perm_0, x = normed_output_273_cast_fp16)[name = string("transpose_119")]; + tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = softmax_10_cast_fp16, y = hidden_states_609_cast_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor var_3494_perm_0 = const()[name = string("op_3494_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3496 = const()[name = string("op_3496"), val = tensor([1, 2304, -1])]; + tensor var_3494_cast_fp16 = transpose(perm = var_3494_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_116")]; + tensor var_3497_cast_fp16 = reshape(shape = var_3496, x = var_3494_cast_fp16)[name = string("op_3497_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.f2p+0)]; + fp16 model_vision_tower_encoder_layers_10_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.fp+0)]; + tensor clip_146_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_self_attn_o_proj_input_max_promoted_to_fp16, x = var_3497_cast_fp16)[name = string("clip_146_cast_fp16")]; + tensor model_vision_tower_encoder_layers_10_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228541312)))]; + tensor linear_74_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_10_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_146_cast_fp16)[name = string("linear_74_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.3p+1)]; + fp16 model_vision_tower_encoder_layers_10_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.2ep+1)]; + tensor clip_147_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_74_cast_fp16)[name = string("clip_147_cast_fp16")]; + fp16 var_33_promoted_74_to_fp16 = const()[name = string("op_33_promoted_74_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3510_cast_fp16 = pow(x = clip_147_cast_fp16, y = var_33_promoted_74_to_fp16)[name = string("op_3510_cast_fp16")]; + tensor var_3512_axes_0 = const()[name = string("op_3512_axes_0"), val = tensor([-1])]; + bool var_3512_keep_dims_0 = const()[name = string("op_3512_keep_dims_0"), val = bool(true)]; + tensor var_3512_cast_fp16 = reduce_mean(axes = var_3512_axes_0, keep_dims = var_3512_keep_dims_0, x = var_3510_cast_fp16)[name = string("op_3512_cast_fp16")]; + fp16 var_3513_to_fp16 = const()[name = string("op_3513_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_149_cast_fp16 = add(x = var_3512_cast_fp16, y = var_3513_to_fp16)[name = string("mean_squared_149_cast_fp16")]; + tensor var_3515_cast_fp16 = pow(x = mean_squared_149_cast_fp16, y = var_27_to_fp16)[name = string("op_3515_cast_fp16")]; + tensor normed_output_275_cast_fp16 = mul(x = clip_147_cast_fp16, y = var_3515_cast_fp16)[name = string("normed_output_275_cast_fp16")]; + tensor const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229721024)))]; + tensor normed_output_277_cast_fp16 = mul(x = normed_output_275_cast_fp16, y = const_267_to_fp16)[name = string("normed_output_277_cast_fp16")]; + tensor hidden_states_621_cast_fp16 = add(x = hidden_states_583_cast_fp16, y = normed_output_277_cast_fp16)[name = string("hidden_states_621_cast_fp16")]; + fp16 var_33_promoted_75_to_fp16 = const()[name = string("op_33_promoted_75_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3523_cast_fp16 = pow(x = hidden_states_621_cast_fp16, y = var_33_promoted_75_to_fp16)[name = string("op_3523_cast_fp16")]; + tensor var_3525_axes_0 = const()[name = string("op_3525_axes_0"), val = tensor([-1])]; + bool var_3525_keep_dims_0 = const()[name = string("op_3525_keep_dims_0"), val = bool(true)]; + tensor var_3525_cast_fp16 = reduce_mean(axes = var_3525_axes_0, keep_dims = var_3525_keep_dims_0, x = var_3523_cast_fp16)[name = string("op_3525_cast_fp16")]; + fp16 var_3526_to_fp16 = const()[name = string("op_3526_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_151_cast_fp16 = add(x = var_3525_cast_fp16, y = var_3526_to_fp16)[name = string("mean_squared_151_cast_fp16")]; + tensor var_3528_cast_fp16 = pow(x = mean_squared_151_cast_fp16, y = var_27_to_fp16)[name = string("op_3528_cast_fp16")]; + tensor normed_output_279_cast_fp16 = mul(x = hidden_states_621_cast_fp16, y = var_3528_cast_fp16)[name = string("normed_output_279_cast_fp16")]; + tensor const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229722624)))]; + tensor normed_output_281_cast_fp16 = mul(x = normed_output_279_cast_fp16, y = const_268_to_fp16)[name = string("normed_output_281_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.1ep+3)]; + fp16 model_vision_tower_encoder_layers_10_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.1cp+3)]; + tensor clip_148_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_281_cast_fp16)[name = string("clip_148_cast_fp16")]; + tensor model_vision_tower_encoder_layers_10_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229724224)))]; + tensor linear_75_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_10_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_148_cast_fp16)[name = string("linear_75_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.38p+3)]; + fp16 model_vision_tower_encoder_layers_10_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.36p+3)]; + tensor clip_149_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_75_cast_fp16)[name = string("clip_149_cast_fp16")]; + string var_3545_mode_0 = const()[name = string("op_3545_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3545_cast_fp16 = gelu(mode = var_3545_mode_0, x = clip_149_cast_fp16)[name = string("op_3545_cast_fp16")]; + tensor model_vision_tower_encoder_layers_10_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234442880)))]; + tensor linear_76_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_10_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_148_cast_fp16)[name = string("linear_76_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.38p+3)]; + fp16 model_vision_tower_encoder_layers_10_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.36p+3)]; + tensor clip_151_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_mlp_up_proj_output_max_promoted_to_fp16, x = linear_76_cast_fp16)[name = string("clip_151_cast_fp16")]; + tensor hidden_states_631_cast_fp16 = mul(x = var_3545_cast_fp16, y = clip_151_cast_fp16)[name = string("hidden_states_631_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.28p+4)]; + fp16 model_vision_tower_encoder_layers_10_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.26p+4)]; + tensor clip_152_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_631_cast_fp16)[name = string("clip_152_cast_fp16")]; + tensor model_vision_tower_encoder_layers_10_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239161536)))]; + tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_10_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_152_cast_fp16)[name = string("linear_77_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_10_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.66p+2)]; + fp16 model_vision_tower_encoder_layers_10_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_10_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.64p+2)]; + tensor clip_153_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_10_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_10_mlp_down_proj_output_max_promoted_to_fp16, x = linear_77_cast_fp16)[name = string("clip_153_cast_fp16")]; + fp16 var_33_promoted_76_to_fp16 = const()[name = string("op_33_promoted_76_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3567_cast_fp16 = pow(x = clip_153_cast_fp16, y = var_33_promoted_76_to_fp16)[name = string("op_3567_cast_fp16")]; + tensor var_3569_axes_0 = const()[name = string("op_3569_axes_0"), val = tensor([-1])]; + bool var_3569_keep_dims_0 = const()[name = string("op_3569_keep_dims_0"), val = bool(true)]; + tensor var_3569_cast_fp16 = reduce_mean(axes = var_3569_axes_0, keep_dims = var_3569_keep_dims_0, x = var_3567_cast_fp16)[name = string("op_3569_cast_fp16")]; + fp16 var_3570_to_fp16 = const()[name = string("op_3570_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_153_cast_fp16 = add(x = var_3569_cast_fp16, y = var_3570_to_fp16)[name = string("mean_squared_153_cast_fp16")]; + tensor var_3572_cast_fp16 = pow(x = mean_squared_153_cast_fp16, y = var_27_to_fp16)[name = string("op_3572_cast_fp16")]; + tensor normed_output_283_cast_fp16 = mul(x = clip_153_cast_fp16, y = var_3572_cast_fp16)[name = string("normed_output_283_cast_fp16")]; + tensor const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243880192)))]; + tensor normed_output_285_cast_fp16 = mul(x = normed_output_283_cast_fp16, y = const_269_to_fp16)[name = string("normed_output_285_cast_fp16")]; + tensor hidden_states_641_cast_fp16 = add(x = hidden_states_621_cast_fp16, y = normed_output_285_cast_fp16)[name = string("hidden_states_641_cast_fp16")]; + fp16 var_33_promoted_77_to_fp16 = const()[name = string("op_33_promoted_77_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3586_cast_fp16 = pow(x = hidden_states_641_cast_fp16, y = var_33_promoted_77_to_fp16)[name = string("op_3586_cast_fp16")]; + tensor var_3588_axes_0 = const()[name = string("op_3588_axes_0"), val = tensor([-1])]; + bool var_3588_keep_dims_0 = const()[name = string("op_3588_keep_dims_0"), val = bool(true)]; + tensor var_3588_cast_fp16 = reduce_mean(axes = var_3588_axes_0, keep_dims = var_3588_keep_dims_0, x = var_3586_cast_fp16)[name = string("op_3588_cast_fp16")]; + fp16 var_3589_to_fp16 = const()[name = string("op_3589_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_155_cast_fp16 = add(x = var_3588_cast_fp16, y = var_3589_to_fp16)[name = string("mean_squared_155_cast_fp16")]; + tensor var_3591_cast_fp16 = pow(x = mean_squared_155_cast_fp16, y = var_27_to_fp16)[name = string("op_3591_cast_fp16")]; + tensor normed_output_287_cast_fp16 = mul(x = hidden_states_641_cast_fp16, y = var_3591_cast_fp16)[name = string("normed_output_287_cast_fp16")]; + tensor const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243881792)))]; + tensor normed_output_289_cast_fp16 = mul(x = normed_output_287_cast_fp16, y = const_270_to_fp16)[name = string("normed_output_289_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.7cp+3)]; + fp16 model_vision_tower_encoder_layers_11_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.78p+3)]; + tensor clip_154_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_289_cast_fp16)[name = string("clip_154_cast_fp16")]; + tensor model_vision_tower_encoder_layers_11_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243883392)))]; + tensor linear_78_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_11_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_154_cast_fp16)[name = string("linear_78_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.dp+3)]; + fp16 model_vision_tower_encoder_layers_11_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.ccp+3)]; + tensor clip_155_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_78_cast_fp16)[name = string("clip_155_cast_fp16")]; + tensor var_3613 = const()[name = string("op_3613"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_649_cast_fp16 = reshape(shape = var_3613, x = clip_155_cast_fp16)[name = string("hidden_states_649_cast_fp16")]; + fp16 var_33_promoted_78_to_fp16 = const()[name = string("op_33_promoted_78_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3617_cast_fp16 = pow(x = hidden_states_649_cast_fp16, y = var_33_promoted_78_to_fp16)[name = string("op_3617_cast_fp16")]; + tensor var_3619_axes_0 = const()[name = string("op_3619_axes_0"), val = tensor([-1])]; + bool var_3619_keep_dims_0 = const()[name = string("op_3619_keep_dims_0"), val = bool(true)]; + tensor var_3619_cast_fp16 = reduce_mean(axes = var_3619_axes_0, keep_dims = var_3619_keep_dims_0, x = var_3617_cast_fp16)[name = string("op_3619_cast_fp16")]; + fp16 var_3620_to_fp16 = const()[name = string("op_3620_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_157_cast_fp16 = add(x = var_3619_cast_fp16, y = var_3620_to_fp16)[name = string("mean_squared_157_cast_fp16")]; + tensor var_3622_cast_fp16 = pow(x = mean_squared_157_cast_fp16, y = var_27_to_fp16)[name = string("op_3622_cast_fp16")]; + tensor normed_output_291_cast_fp16 = mul(x = hidden_states_649_cast_fp16, y = var_3622_cast_fp16)[name = string("normed_output_291_cast_fp16")]; + tensor const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245063104)))]; + tensor normed_output_293_cast_fp16 = mul(x = normed_output_291_cast_fp16, y = const_273_to_fp16)[name = string("normed_output_293_cast_fp16")]; + tensor var_3642 = const()[name = string("op_3642"), val = tensor([32, 32])]; + int32 var_3643_axis_0 = const()[name = string("op_3643_axis_0"), val = int32(-1)]; + tensor var_3643_cast_fp16_0, tensor var_3643_cast_fp16_1 = split(axis = var_3643_axis_0, split_sizes = var_3642, x = normed_output_293_cast_fp16)[name = string("op_3643_cast_fp16")]; + tensor var_3646 = const()[name = string("op_3646"), val = tensor([32, 32])]; + int32 var_3647_axis_0 = const()[name = string("op_3647_axis_0"), val = int32(-1)]; + tensor var_3647_0, tensor var_3647_1 = split(axis = var_3647_axis_0, split_sizes = var_3646, x = var_160_cast_fp16)[name = string("op_3647")]; + tensor var_3650 = const()[name = string("op_3650"), val = tensor([32, 32])]; + int32 var_3651_axis_0 = const()[name = string("op_3651_axis_0"), val = int32(-1)]; + tensor var_3651_0, tensor var_3651_1 = split(axis = var_3651_axis_0, split_sizes = var_3650, x = var_163_cast_fp16)[name = string("op_3651")]; + tensor cos_181_axes_0 = const()[name = string("cos_181_axes_0"), val = tensor([2])]; + tensor cos_181 = expand_dims(axes = cos_181_axes_0, x = var_3647_0)[name = string("cos_181")]; + tensor sin_181_axes_0 = const()[name = string("sin_181_axes_0"), val = tensor([2])]; + tensor sin_181 = expand_dims(axes = sin_181_axes_0, x = var_3651_0)[name = string("sin_181")]; + tensor var_3656_cast_fp16 = mul(x = var_3643_cast_fp16_0, y = cos_181)[name = string("op_3656_cast_fp16")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = var_3643_cast_fp16_0)[name = string("x1_89_cast_fp16")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = var_3643_cast_fp16_0)[name = string("x2_89_cast_fp16")]; + fp16 const_278_promoted_to_fp16 = const()[name = string("const_278_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3667_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_278_promoted_to_fp16)[name = string("op_3667_cast_fp16")]; + bool var_3669_interleave_0 = const()[name = string("op_3669_interleave_0"), val = bool(false)]; + tensor var_3669_cast_fp16 = concat(axis = var_38, interleave = var_3669_interleave_0, values = (var_3667_cast_fp16, x1_89_cast_fp16))[name = string("op_3669_cast_fp16")]; + tensor var_3670_cast_fp16 = mul(x = var_3669_cast_fp16, y = sin_181)[name = string("op_3670_cast_fp16")]; + tensor var_3671_cast_fp16 = add(x = var_3656_cast_fp16, y = var_3670_cast_fp16)[name = string("op_3671_cast_fp16")]; + tensor cos_185_axes_0 = const()[name = string("cos_185_axes_0"), val = tensor([2])]; + tensor cos_185 = expand_dims(axes = cos_185_axes_0, x = var_3647_1)[name = string("cos_185")]; + tensor sin_185_axes_0 = const()[name = string("sin_185_axes_0"), val = tensor([2])]; + tensor sin_185 = expand_dims(axes = sin_185_axes_0, x = var_3651_1)[name = string("sin_185")]; + tensor var_3674_cast_fp16 = mul(x = var_3643_cast_fp16_1, y = cos_185)[name = string("op_3674_cast_fp16")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = var_3643_cast_fp16_1)[name = string("x1_91_cast_fp16")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = var_3643_cast_fp16_1)[name = string("x2_91_cast_fp16")]; + fp16 const_281_promoted_to_fp16 = const()[name = string("const_281_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3685_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_281_promoted_to_fp16)[name = string("op_3685_cast_fp16")]; + bool var_3687_interleave_0 = const()[name = string("op_3687_interleave_0"), val = bool(false)]; + tensor var_3687_cast_fp16 = concat(axis = var_38, interleave = var_3687_interleave_0, values = (var_3685_cast_fp16, x1_91_cast_fp16))[name = string("op_3687_cast_fp16")]; + tensor var_3688_cast_fp16 = mul(x = var_3687_cast_fp16, y = sin_185)[name = string("op_3688_cast_fp16")]; + tensor var_3689_cast_fp16 = add(x = var_3674_cast_fp16, y = var_3688_cast_fp16)[name = string("op_3689_cast_fp16")]; + bool query_states_23_interleave_0 = const()[name = string("query_states_23_interleave_0"), val = bool(false)]; + tensor query_states_23_cast_fp16 = concat(axis = var_38, interleave = query_states_23_interleave_0, values = (var_3671_cast_fp16, var_3689_cast_fp16))[name = string("query_states_23_cast_fp16")]; + tensor model_vision_tower_encoder_layers_11_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245063296)))]; + tensor linear_79_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_11_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_154_cast_fp16)[name = string("linear_79_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.bap+3)]; + fp16 model_vision_tower_encoder_layers_11_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.b8p+3)]; + tensor clip_157_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_79_cast_fp16)[name = string("clip_157_cast_fp16")]; + tensor var_3702 = const()[name = string("op_3702"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_655_cast_fp16 = reshape(shape = var_3702, x = clip_157_cast_fp16)[name = string("hidden_states_655_cast_fp16")]; + fp16 var_33_promoted_79_to_fp16 = const()[name = string("op_33_promoted_79_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3706_cast_fp16 = pow(x = hidden_states_655_cast_fp16, y = var_33_promoted_79_to_fp16)[name = string("op_3706_cast_fp16")]; + tensor var_3708_axes_0 = const()[name = string("op_3708_axes_0"), val = tensor([-1])]; + bool var_3708_keep_dims_0 = const()[name = string("op_3708_keep_dims_0"), val = bool(true)]; + tensor var_3708_cast_fp16 = reduce_mean(axes = var_3708_axes_0, keep_dims = var_3708_keep_dims_0, x = var_3706_cast_fp16)[name = string("op_3708_cast_fp16")]; + fp16 var_3709_to_fp16 = const()[name = string("op_3709_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_159_cast_fp16 = add(x = var_3708_cast_fp16, y = var_3709_to_fp16)[name = string("mean_squared_159_cast_fp16")]; + tensor var_3711_cast_fp16 = pow(x = mean_squared_159_cast_fp16, y = var_27_to_fp16)[name = string("op_3711_cast_fp16")]; + tensor normed_output_295_cast_fp16 = mul(x = hidden_states_655_cast_fp16, y = var_3711_cast_fp16)[name = string("normed_output_295_cast_fp16")]; + tensor const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246243008)))]; + tensor normed_output_297_cast_fp16 = mul(x = normed_output_295_cast_fp16, y = const_282_to_fp16)[name = string("normed_output_297_cast_fp16")]; + tensor var_3731 = const()[name = string("op_3731"), val = tensor([32, 32])]; + int32 var_3732_axis_0 = const()[name = string("op_3732_axis_0"), val = int32(-1)]; + tensor var_3732_cast_fp16_0, tensor var_3732_cast_fp16_1 = split(axis = var_3732_axis_0, split_sizes = var_3731, x = normed_output_297_cast_fp16)[name = string("op_3732_cast_fp16")]; + tensor var_3735 = const()[name = string("op_3735"), val = tensor([32, 32])]; + int32 var_3736_axis_0 = const()[name = string("op_3736_axis_0"), val = int32(-1)]; + tensor var_3736_0, tensor var_3736_1 = split(axis = var_3736_axis_0, split_sizes = var_3735, x = var_160_cast_fp16)[name = string("op_3736")]; + tensor var_3739 = const()[name = string("op_3739"), val = tensor([32, 32])]; + int32 var_3740_axis_0 = const()[name = string("op_3740_axis_0"), val = int32(-1)]; + tensor var_3740_0, tensor var_3740_1 = split(axis = var_3740_axis_0, split_sizes = var_3739, x = var_163_cast_fp16)[name = string("op_3740")]; + tensor cos_189_axes_0 = const()[name = string("cos_189_axes_0"), val = tensor([2])]; + tensor cos_189 = expand_dims(axes = cos_189_axes_0, x = var_3736_0)[name = string("cos_189")]; + tensor sin_189_axes_0 = const()[name = string("sin_189_axes_0"), val = tensor([2])]; + tensor sin_189 = expand_dims(axes = sin_189_axes_0, x = var_3740_0)[name = string("sin_189")]; + tensor var_3745_cast_fp16 = mul(x = var_3732_cast_fp16_0, y = cos_189)[name = string("op_3745_cast_fp16")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = var_3732_cast_fp16_0)[name = string("x1_93_cast_fp16")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = var_3732_cast_fp16_0)[name = string("x2_93_cast_fp16")]; + fp16 const_287_promoted_to_fp16 = const()[name = string("const_287_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3756_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_287_promoted_to_fp16)[name = string("op_3756_cast_fp16")]; + bool var_3758_interleave_0 = const()[name = string("op_3758_interleave_0"), val = bool(false)]; + tensor var_3758_cast_fp16 = concat(axis = var_38, interleave = var_3758_interleave_0, values = (var_3756_cast_fp16, x1_93_cast_fp16))[name = string("op_3758_cast_fp16")]; + tensor var_3759_cast_fp16 = mul(x = var_3758_cast_fp16, y = sin_189)[name = string("op_3759_cast_fp16")]; + tensor var_3760_cast_fp16 = add(x = var_3745_cast_fp16, y = var_3759_cast_fp16)[name = string("op_3760_cast_fp16")]; + tensor cos_193_axes_0 = const()[name = string("cos_193_axes_0"), val = tensor([2])]; + tensor cos_193 = expand_dims(axes = cos_193_axes_0, x = var_3736_1)[name = string("cos_193")]; + tensor sin_193_axes_0 = const()[name = string("sin_193_axes_0"), val = tensor([2])]; + tensor sin_193 = expand_dims(axes = sin_193_axes_0, x = var_3740_1)[name = string("sin_193")]; + tensor var_3763_cast_fp16 = mul(x = var_3732_cast_fp16_1, y = cos_193)[name = string("op_3763_cast_fp16")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = var_3732_cast_fp16_1)[name = string("x1_95_cast_fp16")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = var_3732_cast_fp16_1)[name = string("x2_95_cast_fp16")]; + fp16 const_290_promoted_to_fp16 = const()[name = string("const_290_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3774_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_290_promoted_to_fp16)[name = string("op_3774_cast_fp16")]; + bool var_3776_interleave_0 = const()[name = string("op_3776_interleave_0"), val = bool(false)]; + tensor var_3776_cast_fp16 = concat(axis = var_38, interleave = var_3776_interleave_0, values = (var_3774_cast_fp16, x1_95_cast_fp16))[name = string("op_3776_cast_fp16")]; + tensor var_3777_cast_fp16 = mul(x = var_3776_cast_fp16, y = sin_193)[name = string("op_3777_cast_fp16")]; + tensor var_3778_cast_fp16 = add(x = var_3763_cast_fp16, y = var_3777_cast_fp16)[name = string("op_3778_cast_fp16")]; + bool key_states_23_interleave_0 = const()[name = string("key_states_23_interleave_0"), val = bool(false)]; + tensor key_states_23_cast_fp16 = concat(axis = var_38, interleave = key_states_23_interleave_0, values = (var_3760_cast_fp16, var_3778_cast_fp16))[name = string("key_states_23_cast_fp16")]; + tensor model_vision_tower_encoder_layers_11_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246243200)))]; + tensor linear_80_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_11_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_154_cast_fp16)[name = string("linear_80_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.bap+3)]; + fp16 model_vision_tower_encoder_layers_11_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.b8p+3)]; + tensor clip_159_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_80_cast_fp16)[name = string("clip_159_cast_fp16")]; + tensor var_3791 = const()[name = string("op_3791"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_661_cast_fp16 = reshape(shape = var_3791, x = clip_159_cast_fp16)[name = string("hidden_states_661_cast_fp16")]; + fp16 var_33_promoted_80_to_fp16 = const()[name = string("op_33_promoted_80_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3794_cast_fp16 = pow(x = hidden_states_661_cast_fp16, y = var_33_promoted_80_to_fp16)[name = string("op_3794_cast_fp16")]; + tensor var_3796_axes_0 = const()[name = string("op_3796_axes_0"), val = tensor([-1])]; + bool var_3796_keep_dims_0 = const()[name = string("op_3796_keep_dims_0"), val = bool(true)]; + tensor var_3796_cast_fp16 = reduce_mean(axes = var_3796_axes_0, keep_dims = var_3796_keep_dims_0, x = var_3794_cast_fp16)[name = string("op_3796_cast_fp16")]; + fp16 var_3797_to_fp16 = const()[name = string("op_3797_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_161_cast_fp16 = add(x = var_3796_cast_fp16, y = var_3797_to_fp16)[name = string("mean_squared_161_cast_fp16")]; + tensor var_3799_cast_fp16 = pow(x = mean_squared_161_cast_fp16, y = var_27_to_fp16)[name = string("op_3799_cast_fp16")]; + tensor normed_output_299_cast_fp16 = mul(x = hidden_states_661_cast_fp16, y = var_3799_cast_fp16)[name = string("normed_output_299_cast_fp16")]; + tensor hidden_states_667_perm_0 = const()[name = string("hidden_states_667_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(true)]; + bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; + tensor transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_87 = transpose(perm = transpose_87_perm_0, x = key_states_23_cast_fp16)[name = string("transpose_113")]; + tensor transpose_86 = transpose(perm = transpose_86_perm_0, x = query_states_23_cast_fp16)[name = string("transpose_114")]; + tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("matmul_11_cast_fp16")]; + tensor add_11_cast_fp16 = add(x = matmul_11_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_11_cast_fp16")]; + int32 softmax_11_axis_0 = const()[name = string("softmax_11_axis_0"), val = int32(-1)]; + tensor softmax_11_cast_fp16 = softmax(axis = softmax_11_axis_0, x = add_11_cast_fp16)[name = string("softmax_11_cast_fp16")]; + bool attn_output_45_transpose_x_0 = const()[name = string("attn_output_45_transpose_x_0"), val = bool(false)]; + bool attn_output_45_transpose_y_0 = const()[name = string("attn_output_45_transpose_y_0"), val = bool(false)]; + tensor hidden_states_667_cast_fp16 = transpose(perm = hidden_states_667_perm_0, x = normed_output_299_cast_fp16)[name = string("transpose_115")]; + tensor attn_output_45_cast_fp16 = matmul(transpose_x = attn_output_45_transpose_x_0, transpose_y = attn_output_45_transpose_y_0, x = softmax_11_cast_fp16, y = hidden_states_667_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_3804_perm_0 = const()[name = string("op_3804_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3806 = const()[name = string("op_3806"), val = tensor([1, 2304, -1])]; + tensor var_3804_cast_fp16 = transpose(perm = var_3804_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_112")]; + tensor var_3807_cast_fp16 = reshape(shape = var_3806, x = var_3804_cast_fp16)[name = string("op_3807_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.34p+1)]; + fp16 model_vision_tower_encoder_layers_11_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.32p+1)]; + tensor clip_160_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_self_attn_o_proj_input_max_promoted_to_fp16, x = var_3807_cast_fp16)[name = string("clip_160_cast_fp16")]; + tensor model_vision_tower_encoder_layers_11_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247422912)))]; + tensor linear_81_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_11_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_160_cast_fp16)[name = string("linear_81_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.dep+1)]; + fp16 model_vision_tower_encoder_layers_11_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.dap+1)]; + tensor clip_161_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_81_cast_fp16)[name = string("clip_161_cast_fp16")]; + fp16 var_33_promoted_81_to_fp16 = const()[name = string("op_33_promoted_81_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3820_cast_fp16 = pow(x = clip_161_cast_fp16, y = var_33_promoted_81_to_fp16)[name = string("op_3820_cast_fp16")]; + tensor var_3822_axes_0 = const()[name = string("op_3822_axes_0"), val = tensor([-1])]; + bool var_3822_keep_dims_0 = const()[name = string("op_3822_keep_dims_0"), val = bool(true)]; + tensor var_3822_cast_fp16 = reduce_mean(axes = var_3822_axes_0, keep_dims = var_3822_keep_dims_0, x = var_3820_cast_fp16)[name = string("op_3822_cast_fp16")]; + fp16 var_3823_to_fp16 = const()[name = string("op_3823_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_163_cast_fp16 = add(x = var_3822_cast_fp16, y = var_3823_to_fp16)[name = string("mean_squared_163_cast_fp16")]; + tensor var_3825_cast_fp16 = pow(x = mean_squared_163_cast_fp16, y = var_27_to_fp16)[name = string("op_3825_cast_fp16")]; + tensor normed_output_301_cast_fp16 = mul(x = clip_161_cast_fp16, y = var_3825_cast_fp16)[name = string("normed_output_301_cast_fp16")]; + tensor const_291_to_fp16 = const()[name = string("const_291_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248602624)))]; + tensor normed_output_303_cast_fp16 = mul(x = normed_output_301_cast_fp16, y = const_291_to_fp16)[name = string("normed_output_303_cast_fp16")]; + tensor hidden_states_679_cast_fp16 = add(x = hidden_states_641_cast_fp16, y = normed_output_303_cast_fp16)[name = string("hidden_states_679_cast_fp16")]; + fp16 var_33_promoted_82_to_fp16 = const()[name = string("op_33_promoted_82_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3833_cast_fp16 = pow(x = hidden_states_679_cast_fp16, y = var_33_promoted_82_to_fp16)[name = string("op_3833_cast_fp16")]; + tensor var_3835_axes_0 = const()[name = string("op_3835_axes_0"), val = tensor([-1])]; + bool var_3835_keep_dims_0 = const()[name = string("op_3835_keep_dims_0"), val = bool(true)]; + tensor var_3835_cast_fp16 = reduce_mean(axes = var_3835_axes_0, keep_dims = var_3835_keep_dims_0, x = var_3833_cast_fp16)[name = string("op_3835_cast_fp16")]; + fp16 var_3836_to_fp16 = const()[name = string("op_3836_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_165_cast_fp16 = add(x = var_3835_cast_fp16, y = var_3836_to_fp16)[name = string("mean_squared_165_cast_fp16")]; + tensor var_3838_cast_fp16 = pow(x = mean_squared_165_cast_fp16, y = var_27_to_fp16)[name = string("op_3838_cast_fp16")]; + tensor normed_output_305_cast_fp16 = mul(x = hidden_states_679_cast_fp16, y = var_3838_cast_fp16)[name = string("normed_output_305_cast_fp16")]; + tensor const_292_to_fp16 = const()[name = string("const_292_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248604224)))]; + tensor normed_output_307_cast_fp16 = mul(x = normed_output_305_cast_fp16, y = const_292_to_fp16)[name = string("normed_output_307_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.84p+2)]; + fp16 model_vision_tower_encoder_layers_11_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.82p+2)]; + tensor clip_162_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_307_cast_fp16)[name = string("clip_162_cast_fp16")]; + tensor model_vision_tower_encoder_layers_11_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248605824)))]; + tensor linear_82_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_11_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_162_cast_fp16)[name = string("linear_82_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.8p+2)]; + fp16 model_vision_tower_encoder_layers_11_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.7ep+2)]; + tensor clip_163_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_82_cast_fp16)[name = string("clip_163_cast_fp16")]; + string var_3855_mode_0 = const()[name = string("op_3855_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_3855_cast_fp16 = gelu(mode = var_3855_mode_0, x = clip_163_cast_fp16)[name = string("op_3855_cast_fp16")]; + tensor model_vision_tower_encoder_layers_11_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253324480)))]; + tensor linear_83_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_11_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_162_cast_fp16)[name = string("linear_83_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.8p+2)]; + fp16 model_vision_tower_encoder_layers_11_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.7ep+2)]; + tensor clip_165_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_mlp_up_proj_output_max_promoted_to_fp16, x = linear_83_cast_fp16)[name = string("clip_165_cast_fp16")]; + tensor hidden_states_689_cast_fp16 = mul(x = var_3855_cast_fp16, y = clip_165_cast_fp16)[name = string("hidden_states_689_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.2ep+3)]; + fp16 model_vision_tower_encoder_layers_11_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.2cp+3)]; + tensor clip_166_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_689_cast_fp16)[name = string("clip_166_cast_fp16")]; + tensor model_vision_tower_encoder_layers_11_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258043136)))]; + tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_11_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_166_cast_fp16)[name = string("linear_84_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_11_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.3cp+1)]; + fp16 model_vision_tower_encoder_layers_11_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_11_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.3ap+1)]; + tensor clip_167_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_11_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_11_mlp_down_proj_output_max_promoted_to_fp16, x = linear_84_cast_fp16)[name = string("clip_167_cast_fp16")]; + fp16 var_33_promoted_83_to_fp16 = const()[name = string("op_33_promoted_83_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3877_cast_fp16 = pow(x = clip_167_cast_fp16, y = var_33_promoted_83_to_fp16)[name = string("op_3877_cast_fp16")]; + tensor var_3879_axes_0 = const()[name = string("op_3879_axes_0"), val = tensor([-1])]; + bool var_3879_keep_dims_0 = const()[name = string("op_3879_keep_dims_0"), val = bool(true)]; + tensor var_3879_cast_fp16 = reduce_mean(axes = var_3879_axes_0, keep_dims = var_3879_keep_dims_0, x = var_3877_cast_fp16)[name = string("op_3879_cast_fp16")]; + fp16 var_3880_to_fp16 = const()[name = string("op_3880_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_167_cast_fp16 = add(x = var_3879_cast_fp16, y = var_3880_to_fp16)[name = string("mean_squared_167_cast_fp16")]; + tensor var_3882_cast_fp16 = pow(x = mean_squared_167_cast_fp16, y = var_27_to_fp16)[name = string("op_3882_cast_fp16")]; + tensor normed_output_309_cast_fp16 = mul(x = clip_167_cast_fp16, y = var_3882_cast_fp16)[name = string("normed_output_309_cast_fp16")]; + tensor const_293_to_fp16 = const()[name = string("const_293_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262761792)))]; + tensor normed_output_311_cast_fp16 = mul(x = normed_output_309_cast_fp16, y = const_293_to_fp16)[name = string("normed_output_311_cast_fp16")]; + tensor hidden_states_699_cast_fp16 = add(x = hidden_states_679_cast_fp16, y = normed_output_311_cast_fp16)[name = string("hidden_states_699_cast_fp16")]; + fp16 var_33_promoted_84_to_fp16 = const()[name = string("op_33_promoted_84_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3896_cast_fp16 = pow(x = hidden_states_699_cast_fp16, y = var_33_promoted_84_to_fp16)[name = string("op_3896_cast_fp16")]; + tensor var_3898_axes_0 = const()[name = string("op_3898_axes_0"), val = tensor([-1])]; + bool var_3898_keep_dims_0 = const()[name = string("op_3898_keep_dims_0"), val = bool(true)]; + tensor var_3898_cast_fp16 = reduce_mean(axes = var_3898_axes_0, keep_dims = var_3898_keep_dims_0, x = var_3896_cast_fp16)[name = string("op_3898_cast_fp16")]; + fp16 var_3899_to_fp16 = const()[name = string("op_3899_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_169_cast_fp16 = add(x = var_3898_cast_fp16, y = var_3899_to_fp16)[name = string("mean_squared_169_cast_fp16")]; + tensor var_3901_cast_fp16 = pow(x = mean_squared_169_cast_fp16, y = var_27_to_fp16)[name = string("op_3901_cast_fp16")]; + tensor normed_output_313_cast_fp16 = mul(x = hidden_states_699_cast_fp16, y = var_3901_cast_fp16)[name = string("normed_output_313_cast_fp16")]; + tensor const_294_to_fp16 = const()[name = string("const_294_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262763392)))]; + tensor normed_output_315_cast_fp16 = mul(x = normed_output_313_cast_fp16, y = const_294_to_fp16)[name = string("normed_output_315_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.cap+3)]; + fp16 model_vision_tower_encoder_layers_12_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.c6p+3)]; + tensor clip_168_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_315_cast_fp16)[name = string("clip_168_cast_fp16")]; + tensor model_vision_tower_encoder_layers_12_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262764992)))]; + tensor linear_85_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_12_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_168_cast_fp16)[name = string("linear_85_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.f8p+3)]; + fp16 model_vision_tower_encoder_layers_12_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.f4p+3)]; + tensor clip_169_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_85_cast_fp16)[name = string("clip_169_cast_fp16")]; + tensor var_3923 = const()[name = string("op_3923"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_707_cast_fp16 = reshape(shape = var_3923, x = clip_169_cast_fp16)[name = string("hidden_states_707_cast_fp16")]; + fp16 var_33_promoted_85_to_fp16 = const()[name = string("op_33_promoted_85_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3927_cast_fp16 = pow(x = hidden_states_707_cast_fp16, y = var_33_promoted_85_to_fp16)[name = string("op_3927_cast_fp16")]; + tensor var_3929_axes_0 = const()[name = string("op_3929_axes_0"), val = tensor([-1])]; + bool var_3929_keep_dims_0 = const()[name = string("op_3929_keep_dims_0"), val = bool(true)]; + tensor var_3929_cast_fp16 = reduce_mean(axes = var_3929_axes_0, keep_dims = var_3929_keep_dims_0, x = var_3927_cast_fp16)[name = string("op_3929_cast_fp16")]; + fp16 var_3930_to_fp16 = const()[name = string("op_3930_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_171_cast_fp16 = add(x = var_3929_cast_fp16, y = var_3930_to_fp16)[name = string("mean_squared_171_cast_fp16")]; + tensor var_3932_cast_fp16 = pow(x = mean_squared_171_cast_fp16, y = var_27_to_fp16)[name = string("op_3932_cast_fp16")]; + tensor normed_output_317_cast_fp16 = mul(x = hidden_states_707_cast_fp16, y = var_3932_cast_fp16)[name = string("normed_output_317_cast_fp16")]; + tensor const_297_to_fp16 = const()[name = string("const_297_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263944704)))]; + tensor normed_output_319_cast_fp16 = mul(x = normed_output_317_cast_fp16, y = const_297_to_fp16)[name = string("normed_output_319_cast_fp16")]; + tensor var_3952 = const()[name = string("op_3952"), val = tensor([32, 32])]; + int32 var_3953_axis_0 = const()[name = string("op_3953_axis_0"), val = int32(-1)]; + tensor var_3953_cast_fp16_0, tensor var_3953_cast_fp16_1 = split(axis = var_3953_axis_0, split_sizes = var_3952, x = normed_output_319_cast_fp16)[name = string("op_3953_cast_fp16")]; + tensor var_3956 = const()[name = string("op_3956"), val = tensor([32, 32])]; + int32 var_3957_axis_0 = const()[name = string("op_3957_axis_0"), val = int32(-1)]; + tensor var_3957_0, tensor var_3957_1 = split(axis = var_3957_axis_0, split_sizes = var_3956, x = var_160_cast_fp16)[name = string("op_3957")]; + tensor var_3960 = const()[name = string("op_3960"), val = tensor([32, 32])]; + int32 var_3961_axis_0 = const()[name = string("op_3961_axis_0"), val = int32(-1)]; + tensor var_3961_0, tensor var_3961_1 = split(axis = var_3961_axis_0, split_sizes = var_3960, x = var_163_cast_fp16)[name = string("op_3961")]; + tensor cos_197_axes_0 = const()[name = string("cos_197_axes_0"), val = tensor([2])]; + tensor cos_197 = expand_dims(axes = cos_197_axes_0, x = var_3957_0)[name = string("cos_197")]; + tensor sin_197_axes_0 = const()[name = string("sin_197_axes_0"), val = tensor([2])]; + tensor sin_197 = expand_dims(axes = sin_197_axes_0, x = var_3961_0)[name = string("sin_197")]; + tensor var_3966_cast_fp16 = mul(x = var_3953_cast_fp16_0, y = cos_197)[name = string("op_3966_cast_fp16")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = var_3953_cast_fp16_0)[name = string("x1_97_cast_fp16")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = var_3953_cast_fp16_0)[name = string("x2_97_cast_fp16")]; + fp16 const_302_promoted_to_fp16 = const()[name = string("const_302_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3977_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_302_promoted_to_fp16)[name = string("op_3977_cast_fp16")]; + bool var_3979_interleave_0 = const()[name = string("op_3979_interleave_0"), val = bool(false)]; + tensor var_3979_cast_fp16 = concat(axis = var_38, interleave = var_3979_interleave_0, values = (var_3977_cast_fp16, x1_97_cast_fp16))[name = string("op_3979_cast_fp16")]; + tensor var_3980_cast_fp16 = mul(x = var_3979_cast_fp16, y = sin_197)[name = string("op_3980_cast_fp16")]; + tensor var_3981_cast_fp16 = add(x = var_3966_cast_fp16, y = var_3980_cast_fp16)[name = string("op_3981_cast_fp16")]; + tensor cos_201_axes_0 = const()[name = string("cos_201_axes_0"), val = tensor([2])]; + tensor cos_201 = expand_dims(axes = cos_201_axes_0, x = var_3957_1)[name = string("cos_201")]; + tensor sin_201_axes_0 = const()[name = string("sin_201_axes_0"), val = tensor([2])]; + tensor sin_201 = expand_dims(axes = sin_201_axes_0, x = var_3961_1)[name = string("sin_201")]; + tensor var_3984_cast_fp16 = mul(x = var_3953_cast_fp16_1, y = cos_201)[name = string("op_3984_cast_fp16")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = var_3953_cast_fp16_1)[name = string("x1_99_cast_fp16")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = var_3953_cast_fp16_1)[name = string("x2_99_cast_fp16")]; + fp16 const_305_promoted_to_fp16 = const()[name = string("const_305_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3995_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_305_promoted_to_fp16)[name = string("op_3995_cast_fp16")]; + bool var_3997_interleave_0 = const()[name = string("op_3997_interleave_0"), val = bool(false)]; + tensor var_3997_cast_fp16 = concat(axis = var_38, interleave = var_3997_interleave_0, values = (var_3995_cast_fp16, x1_99_cast_fp16))[name = string("op_3997_cast_fp16")]; + tensor var_3998_cast_fp16 = mul(x = var_3997_cast_fp16, y = sin_201)[name = string("op_3998_cast_fp16")]; + tensor var_3999_cast_fp16 = add(x = var_3984_cast_fp16, y = var_3998_cast_fp16)[name = string("op_3999_cast_fp16")]; + bool query_states_25_interleave_0 = const()[name = string("query_states_25_interleave_0"), val = bool(false)]; + tensor query_states_25_cast_fp16 = concat(axis = var_38, interleave = query_states_25_interleave_0, values = (var_3981_cast_fp16, var_3999_cast_fp16))[name = string("query_states_25_cast_fp16")]; + tensor model_vision_tower_encoder_layers_12_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263944896)))]; + tensor linear_86_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_12_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_168_cast_fp16)[name = string("linear_86_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.2p+4)]; + fp16 model_vision_tower_encoder_layers_12_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.1ep+4)]; + tensor clip_171_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_86_cast_fp16)[name = string("clip_171_cast_fp16")]; + tensor var_4012 = const()[name = string("op_4012"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_713_cast_fp16 = reshape(shape = var_4012, x = clip_171_cast_fp16)[name = string("hidden_states_713_cast_fp16")]; + fp16 var_33_promoted_86_to_fp16 = const()[name = string("op_33_promoted_86_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4016_cast_fp16 = pow(x = hidden_states_713_cast_fp16, y = var_33_promoted_86_to_fp16)[name = string("op_4016_cast_fp16")]; + tensor var_4018_axes_0 = const()[name = string("op_4018_axes_0"), val = tensor([-1])]; + bool var_4018_keep_dims_0 = const()[name = string("op_4018_keep_dims_0"), val = bool(true)]; + tensor var_4018_cast_fp16 = reduce_mean(axes = var_4018_axes_0, keep_dims = var_4018_keep_dims_0, x = var_4016_cast_fp16)[name = string("op_4018_cast_fp16")]; + fp16 var_4019_to_fp16 = const()[name = string("op_4019_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_173_cast_fp16 = add(x = var_4018_cast_fp16, y = var_4019_to_fp16)[name = string("mean_squared_173_cast_fp16")]; + tensor var_4021_cast_fp16 = pow(x = mean_squared_173_cast_fp16, y = var_27_to_fp16)[name = string("op_4021_cast_fp16")]; + tensor normed_output_321_cast_fp16 = mul(x = hidden_states_713_cast_fp16, y = var_4021_cast_fp16)[name = string("normed_output_321_cast_fp16")]; + tensor const_306_to_fp16 = const()[name = string("const_306_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265124608)))]; + tensor normed_output_323_cast_fp16 = mul(x = normed_output_321_cast_fp16, y = const_306_to_fp16)[name = string("normed_output_323_cast_fp16")]; + tensor var_4041 = const()[name = string("op_4041"), val = tensor([32, 32])]; + int32 var_4042_axis_0 = const()[name = string("op_4042_axis_0"), val = int32(-1)]; + tensor var_4042_cast_fp16_0, tensor var_4042_cast_fp16_1 = split(axis = var_4042_axis_0, split_sizes = var_4041, x = normed_output_323_cast_fp16)[name = string("op_4042_cast_fp16")]; + tensor var_4045 = const()[name = string("op_4045"), val = tensor([32, 32])]; + int32 var_4046_axis_0 = const()[name = string("op_4046_axis_0"), val = int32(-1)]; + tensor var_4046_0, tensor var_4046_1 = split(axis = var_4046_axis_0, split_sizes = var_4045, x = var_160_cast_fp16)[name = string("op_4046")]; + tensor var_4049 = const()[name = string("op_4049"), val = tensor([32, 32])]; + int32 var_4050_axis_0 = const()[name = string("op_4050_axis_0"), val = int32(-1)]; + tensor var_4050_0, tensor var_4050_1 = split(axis = var_4050_axis_0, split_sizes = var_4049, x = var_163_cast_fp16)[name = string("op_4050")]; + tensor cos_205_axes_0 = const()[name = string("cos_205_axes_0"), val = tensor([2])]; + tensor cos_205 = expand_dims(axes = cos_205_axes_0, x = var_4046_0)[name = string("cos_205")]; + tensor sin_205_axes_0 = const()[name = string("sin_205_axes_0"), val = tensor([2])]; + tensor sin_205 = expand_dims(axes = sin_205_axes_0, x = var_4050_0)[name = string("sin_205")]; + tensor var_4055_cast_fp16 = mul(x = var_4042_cast_fp16_0, y = cos_205)[name = string("op_4055_cast_fp16")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = var_4042_cast_fp16_0)[name = string("x1_101_cast_fp16")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = var_4042_cast_fp16_0)[name = string("x2_101_cast_fp16")]; + fp16 const_311_promoted_to_fp16 = const()[name = string("const_311_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4066_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_311_promoted_to_fp16)[name = string("op_4066_cast_fp16")]; + bool var_4068_interleave_0 = const()[name = string("op_4068_interleave_0"), val = bool(false)]; + tensor var_4068_cast_fp16 = concat(axis = var_38, interleave = var_4068_interleave_0, values = (var_4066_cast_fp16, x1_101_cast_fp16))[name = string("op_4068_cast_fp16")]; + tensor var_4069_cast_fp16 = mul(x = var_4068_cast_fp16, y = sin_205)[name = string("op_4069_cast_fp16")]; + tensor var_4070_cast_fp16 = add(x = var_4055_cast_fp16, y = var_4069_cast_fp16)[name = string("op_4070_cast_fp16")]; + tensor cos_209_axes_0 = const()[name = string("cos_209_axes_0"), val = tensor([2])]; + tensor cos_209 = expand_dims(axes = cos_209_axes_0, x = var_4046_1)[name = string("cos_209")]; + tensor sin_209_axes_0 = const()[name = string("sin_209_axes_0"), val = tensor([2])]; + tensor sin_209 = expand_dims(axes = sin_209_axes_0, x = var_4050_1)[name = string("sin_209")]; + tensor var_4073_cast_fp16 = mul(x = var_4042_cast_fp16_1, y = cos_209)[name = string("op_4073_cast_fp16")]; + tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_103_cast_fp16 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = var_4042_cast_fp16_1)[name = string("x1_103_cast_fp16")]; + tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_103_cast_fp16 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = var_4042_cast_fp16_1)[name = string("x2_103_cast_fp16")]; + fp16 const_314_promoted_to_fp16 = const()[name = string("const_314_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4084_cast_fp16 = mul(x = x2_103_cast_fp16, y = const_314_promoted_to_fp16)[name = string("op_4084_cast_fp16")]; + bool var_4086_interleave_0 = const()[name = string("op_4086_interleave_0"), val = bool(false)]; + tensor var_4086_cast_fp16 = concat(axis = var_38, interleave = var_4086_interleave_0, values = (var_4084_cast_fp16, x1_103_cast_fp16))[name = string("op_4086_cast_fp16")]; + tensor var_4087_cast_fp16 = mul(x = var_4086_cast_fp16, y = sin_209)[name = string("op_4087_cast_fp16")]; + tensor var_4088_cast_fp16 = add(x = var_4073_cast_fp16, y = var_4087_cast_fp16)[name = string("op_4088_cast_fp16")]; + bool key_states_25_interleave_0 = const()[name = string("key_states_25_interleave_0"), val = bool(false)]; + tensor key_states_25_cast_fp16 = concat(axis = var_38, interleave = key_states_25_interleave_0, values = (var_4070_cast_fp16, var_4088_cast_fp16))[name = string("key_states_25_cast_fp16")]; + tensor model_vision_tower_encoder_layers_12_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265124800)))]; + tensor linear_87_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_12_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_168_cast_fp16)[name = string("linear_87_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.2p+4)]; + fp16 model_vision_tower_encoder_layers_12_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.1ep+4)]; + tensor clip_173_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_87_cast_fp16)[name = string("clip_173_cast_fp16")]; + tensor var_4101 = const()[name = string("op_4101"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_719_cast_fp16 = reshape(shape = var_4101, x = clip_173_cast_fp16)[name = string("hidden_states_719_cast_fp16")]; + fp16 var_33_promoted_87_to_fp16 = const()[name = string("op_33_promoted_87_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4104_cast_fp16 = pow(x = hidden_states_719_cast_fp16, y = var_33_promoted_87_to_fp16)[name = string("op_4104_cast_fp16")]; + tensor var_4106_axes_0 = const()[name = string("op_4106_axes_0"), val = tensor([-1])]; + bool var_4106_keep_dims_0 = const()[name = string("op_4106_keep_dims_0"), val = bool(true)]; + tensor var_4106_cast_fp16 = reduce_mean(axes = var_4106_axes_0, keep_dims = var_4106_keep_dims_0, x = var_4104_cast_fp16)[name = string("op_4106_cast_fp16")]; + fp16 var_4107_to_fp16 = const()[name = string("op_4107_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_175_cast_fp16 = add(x = var_4106_cast_fp16, y = var_4107_to_fp16)[name = string("mean_squared_175_cast_fp16")]; + tensor var_4109_cast_fp16 = pow(x = mean_squared_175_cast_fp16, y = var_27_to_fp16)[name = string("op_4109_cast_fp16")]; + tensor normed_output_325_cast_fp16 = mul(x = hidden_states_719_cast_fp16, y = var_4109_cast_fp16)[name = string("normed_output_325_cast_fp16")]; + tensor hidden_states_725_perm_0 = const()[name = string("hidden_states_725_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(true)]; + bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; + tensor transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_89 = transpose(perm = transpose_89_perm_0, x = key_states_25_cast_fp16)[name = string("transpose_109")]; + tensor transpose_88 = transpose(perm = transpose_88_perm_0, x = query_states_25_cast_fp16)[name = string("transpose_110")]; + tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("matmul_12_cast_fp16")]; + tensor add_12_cast_fp16 = add(x = matmul_12_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_12_cast_fp16")]; + int32 softmax_12_axis_0 = const()[name = string("softmax_12_axis_0"), val = int32(-1)]; + tensor softmax_12_cast_fp16 = softmax(axis = softmax_12_axis_0, x = add_12_cast_fp16)[name = string("softmax_12_cast_fp16")]; + bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; + bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; + tensor hidden_states_725_cast_fp16 = transpose(perm = hidden_states_725_perm_0, x = normed_output_325_cast_fp16)[name = string("transpose_111")]; + tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = softmax_12_cast_fp16, y = hidden_states_725_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_4114_perm_0 = const()[name = string("op_4114_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4116 = const()[name = string("op_4116"), val = tensor([1, 2304, -1])]; + tensor var_4114_cast_fp16 = transpose(perm = var_4114_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_108")]; + tensor var_4117_cast_fp16 = reshape(shape = var_4116, x = var_4114_cast_fp16)[name = string("op_4117_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.eep+0)]; + fp16 model_vision_tower_encoder_layers_12_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.ecp+0)]; + tensor clip_174_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_self_attn_o_proj_input_max_promoted_to_fp16, x = var_4117_cast_fp16)[name = string("clip_174_cast_fp16")]; + tensor model_vision_tower_encoder_layers_12_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266304512)))]; + tensor linear_88_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_12_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_174_cast_fp16)[name = string("linear_88_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.3cp+1)]; + fp16 model_vision_tower_encoder_layers_12_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.3ap+1)]; + tensor clip_175_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_88_cast_fp16)[name = string("clip_175_cast_fp16")]; + fp16 var_33_promoted_88_to_fp16 = const()[name = string("op_33_promoted_88_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4130_cast_fp16 = pow(x = clip_175_cast_fp16, y = var_33_promoted_88_to_fp16)[name = string("op_4130_cast_fp16")]; + tensor var_4132_axes_0 = const()[name = string("op_4132_axes_0"), val = tensor([-1])]; + bool var_4132_keep_dims_0 = const()[name = string("op_4132_keep_dims_0"), val = bool(true)]; + tensor var_4132_cast_fp16 = reduce_mean(axes = var_4132_axes_0, keep_dims = var_4132_keep_dims_0, x = var_4130_cast_fp16)[name = string("op_4132_cast_fp16")]; + fp16 var_4133_to_fp16 = const()[name = string("op_4133_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_177_cast_fp16 = add(x = var_4132_cast_fp16, y = var_4133_to_fp16)[name = string("mean_squared_177_cast_fp16")]; + tensor var_4135_cast_fp16 = pow(x = mean_squared_177_cast_fp16, y = var_27_to_fp16)[name = string("op_4135_cast_fp16")]; + tensor normed_output_327_cast_fp16 = mul(x = clip_175_cast_fp16, y = var_4135_cast_fp16)[name = string("normed_output_327_cast_fp16")]; + tensor const_315_to_fp16 = const()[name = string("const_315_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267484224)))]; + tensor normed_output_329_cast_fp16 = mul(x = normed_output_327_cast_fp16, y = const_315_to_fp16)[name = string("normed_output_329_cast_fp16")]; + tensor hidden_states_737_cast_fp16 = add(x = hidden_states_699_cast_fp16, y = normed_output_329_cast_fp16)[name = string("hidden_states_737_cast_fp16")]; + fp16 var_33_promoted_89_to_fp16 = const()[name = string("op_33_promoted_89_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4143_cast_fp16 = pow(x = hidden_states_737_cast_fp16, y = var_33_promoted_89_to_fp16)[name = string("op_4143_cast_fp16")]; + tensor var_4145_axes_0 = const()[name = string("op_4145_axes_0"), val = tensor([-1])]; + bool var_4145_keep_dims_0 = const()[name = string("op_4145_keep_dims_0"), val = bool(true)]; + tensor var_4145_cast_fp16 = reduce_mean(axes = var_4145_axes_0, keep_dims = var_4145_keep_dims_0, x = var_4143_cast_fp16)[name = string("op_4145_cast_fp16")]; + fp16 var_4146_to_fp16 = const()[name = string("op_4146_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_179_cast_fp16 = add(x = var_4145_cast_fp16, y = var_4146_to_fp16)[name = string("mean_squared_179_cast_fp16")]; + tensor var_4148_cast_fp16 = pow(x = mean_squared_179_cast_fp16, y = var_27_to_fp16)[name = string("op_4148_cast_fp16")]; + tensor normed_output_331_cast_fp16 = mul(x = hidden_states_737_cast_fp16, y = var_4148_cast_fp16)[name = string("normed_output_331_cast_fp16")]; + tensor const_316_to_fp16 = const()[name = string("const_316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267485824)))]; + tensor normed_output_333_cast_fp16 = mul(x = normed_output_331_cast_fp16, y = const_316_to_fp16)[name = string("normed_output_333_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.cp+2)]; + fp16 model_vision_tower_encoder_layers_12_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.bcp+2)]; + tensor clip_176_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_333_cast_fp16)[name = string("clip_176_cast_fp16")]; + tensor model_vision_tower_encoder_layers_12_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267487424)))]; + tensor linear_89_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_12_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_176_cast_fp16)[name = string("linear_89_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.cap+2)]; + fp16 model_vision_tower_encoder_layers_12_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.c6p+2)]; + tensor clip_177_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_89_cast_fp16)[name = string("clip_177_cast_fp16")]; + string var_4165_mode_0 = const()[name = string("op_4165_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4165_cast_fp16 = gelu(mode = var_4165_mode_0, x = clip_177_cast_fp16)[name = string("op_4165_cast_fp16")]; + tensor model_vision_tower_encoder_layers_12_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272206080)))]; + tensor linear_90_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_12_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_176_cast_fp16)[name = string("linear_90_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.cap+2)]; + fp16 model_vision_tower_encoder_layers_12_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.c6p+2)]; + tensor clip_179_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_mlp_up_proj_output_max_promoted_to_fp16, x = linear_90_cast_fp16)[name = string("clip_179_cast_fp16")]; + tensor hidden_states_747_cast_fp16 = mul(x = var_4165_cast_fp16, y = clip_179_cast_fp16)[name = string("hidden_states_747_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.a4p+3)]; + fp16 model_vision_tower_encoder_layers_12_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.ap+3)]; + tensor clip_180_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_747_cast_fp16)[name = string("clip_180_cast_fp16")]; + tensor model_vision_tower_encoder_layers_12_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276924736)))]; + tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_12_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_180_cast_fp16)[name = string("linear_91_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_12_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.1cp+2)]; + fp16 model_vision_tower_encoder_layers_12_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_12_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.1ap+2)]; + tensor clip_181_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_12_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_12_mlp_down_proj_output_max_promoted_to_fp16, x = linear_91_cast_fp16)[name = string("clip_181_cast_fp16")]; + fp16 var_33_promoted_90_to_fp16 = const()[name = string("op_33_promoted_90_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4187_cast_fp16 = pow(x = clip_181_cast_fp16, y = var_33_promoted_90_to_fp16)[name = string("op_4187_cast_fp16")]; + tensor var_4189_axes_0 = const()[name = string("op_4189_axes_0"), val = tensor([-1])]; + bool var_4189_keep_dims_0 = const()[name = string("op_4189_keep_dims_0"), val = bool(true)]; + tensor var_4189_cast_fp16 = reduce_mean(axes = var_4189_axes_0, keep_dims = var_4189_keep_dims_0, x = var_4187_cast_fp16)[name = string("op_4189_cast_fp16")]; + fp16 var_4190_to_fp16 = const()[name = string("op_4190_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_181_cast_fp16 = add(x = var_4189_cast_fp16, y = var_4190_to_fp16)[name = string("mean_squared_181_cast_fp16")]; + tensor var_4192_cast_fp16 = pow(x = mean_squared_181_cast_fp16, y = var_27_to_fp16)[name = string("op_4192_cast_fp16")]; + tensor normed_output_335_cast_fp16 = mul(x = clip_181_cast_fp16, y = var_4192_cast_fp16)[name = string("normed_output_335_cast_fp16")]; + tensor const_317_to_fp16 = const()[name = string("const_317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281643392)))]; + tensor normed_output_337_cast_fp16 = mul(x = normed_output_335_cast_fp16, y = const_317_to_fp16)[name = string("normed_output_337_cast_fp16")]; + tensor hidden_states_757_cast_fp16 = add(x = hidden_states_737_cast_fp16, y = normed_output_337_cast_fp16)[name = string("hidden_states_757_cast_fp16")]; + fp16 var_33_promoted_91_to_fp16 = const()[name = string("op_33_promoted_91_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4206_cast_fp16 = pow(x = hidden_states_757_cast_fp16, y = var_33_promoted_91_to_fp16)[name = string("op_4206_cast_fp16")]; + tensor var_4208_axes_0 = const()[name = string("op_4208_axes_0"), val = tensor([-1])]; + bool var_4208_keep_dims_0 = const()[name = string("op_4208_keep_dims_0"), val = bool(true)]; + tensor var_4208_cast_fp16 = reduce_mean(axes = var_4208_axes_0, keep_dims = var_4208_keep_dims_0, x = var_4206_cast_fp16)[name = string("op_4208_cast_fp16")]; + fp16 var_4209_to_fp16 = const()[name = string("op_4209_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_183_cast_fp16 = add(x = var_4208_cast_fp16, y = var_4209_to_fp16)[name = string("mean_squared_183_cast_fp16")]; + tensor var_4211_cast_fp16 = pow(x = mean_squared_183_cast_fp16, y = var_27_to_fp16)[name = string("op_4211_cast_fp16")]; + tensor normed_output_339_cast_fp16 = mul(x = hidden_states_757_cast_fp16, y = var_4211_cast_fp16)[name = string("normed_output_339_cast_fp16")]; + tensor const_318_to_fp16 = const()[name = string("const_318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281644992)))]; + tensor normed_output_341_cast_fp16 = mul(x = normed_output_339_cast_fp16, y = const_318_to_fp16)[name = string("normed_output_341_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.12p+4)]; + fp16 model_vision_tower_encoder_layers_13_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.0ep+4)]; + tensor clip_182_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_341_cast_fp16)[name = string("clip_182_cast_fp16")]; + tensor model_vision_tower_encoder_layers_13_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281646592)))]; + tensor linear_92_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_13_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_182_cast_fp16)[name = string("linear_92_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.18p+4)]; + fp16 model_vision_tower_encoder_layers_13_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.16p+4)]; + tensor clip_183_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_92_cast_fp16)[name = string("clip_183_cast_fp16")]; + tensor var_4233 = const()[name = string("op_4233"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_765_cast_fp16 = reshape(shape = var_4233, x = clip_183_cast_fp16)[name = string("hidden_states_765_cast_fp16")]; + fp16 var_33_promoted_92_to_fp16 = const()[name = string("op_33_promoted_92_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4237_cast_fp16 = pow(x = hidden_states_765_cast_fp16, y = var_33_promoted_92_to_fp16)[name = string("op_4237_cast_fp16")]; + tensor var_4239_axes_0 = const()[name = string("op_4239_axes_0"), val = tensor([-1])]; + bool var_4239_keep_dims_0 = const()[name = string("op_4239_keep_dims_0"), val = bool(true)]; + tensor var_4239_cast_fp16 = reduce_mean(axes = var_4239_axes_0, keep_dims = var_4239_keep_dims_0, x = var_4237_cast_fp16)[name = string("op_4239_cast_fp16")]; + fp16 var_4240_to_fp16 = const()[name = string("op_4240_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_185_cast_fp16 = add(x = var_4239_cast_fp16, y = var_4240_to_fp16)[name = string("mean_squared_185_cast_fp16")]; + tensor var_4242_cast_fp16 = pow(x = mean_squared_185_cast_fp16, y = var_27_to_fp16)[name = string("op_4242_cast_fp16")]; + tensor normed_output_343_cast_fp16 = mul(x = hidden_states_765_cast_fp16, y = var_4242_cast_fp16)[name = string("normed_output_343_cast_fp16")]; + tensor const_321_to_fp16 = const()[name = string("const_321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282826304)))]; + tensor normed_output_345_cast_fp16 = mul(x = normed_output_343_cast_fp16, y = const_321_to_fp16)[name = string("normed_output_345_cast_fp16")]; + tensor var_4262 = const()[name = string("op_4262"), val = tensor([32, 32])]; + int32 var_4263_axis_0 = const()[name = string("op_4263_axis_0"), val = int32(-1)]; + tensor var_4263_cast_fp16_0, tensor var_4263_cast_fp16_1 = split(axis = var_4263_axis_0, split_sizes = var_4262, x = normed_output_345_cast_fp16)[name = string("op_4263_cast_fp16")]; + tensor var_4266 = const()[name = string("op_4266"), val = tensor([32, 32])]; + int32 var_4267_axis_0 = const()[name = string("op_4267_axis_0"), val = int32(-1)]; + tensor var_4267_0, tensor var_4267_1 = split(axis = var_4267_axis_0, split_sizes = var_4266, x = var_160_cast_fp16)[name = string("op_4267")]; + tensor var_4270 = const()[name = string("op_4270"), val = tensor([32, 32])]; + int32 var_4271_axis_0 = const()[name = string("op_4271_axis_0"), val = int32(-1)]; + tensor var_4271_0, tensor var_4271_1 = split(axis = var_4271_axis_0, split_sizes = var_4270, x = var_163_cast_fp16)[name = string("op_4271")]; + tensor cos_213_axes_0 = const()[name = string("cos_213_axes_0"), val = tensor([2])]; + tensor cos_213 = expand_dims(axes = cos_213_axes_0, x = var_4267_0)[name = string("cos_213")]; + tensor sin_213_axes_0 = const()[name = string("sin_213_axes_0"), val = tensor([2])]; + tensor sin_213 = expand_dims(axes = sin_213_axes_0, x = var_4271_0)[name = string("sin_213")]; + tensor var_4276_cast_fp16 = mul(x = var_4263_cast_fp16_0, y = cos_213)[name = string("op_4276_cast_fp16")]; + tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_105_cast_fp16 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = var_4263_cast_fp16_0)[name = string("x1_105_cast_fp16")]; + tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_105_cast_fp16 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = var_4263_cast_fp16_0)[name = string("x2_105_cast_fp16")]; + fp16 const_326_promoted_to_fp16 = const()[name = string("const_326_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4287_cast_fp16 = mul(x = x2_105_cast_fp16, y = const_326_promoted_to_fp16)[name = string("op_4287_cast_fp16")]; + bool var_4289_interleave_0 = const()[name = string("op_4289_interleave_0"), val = bool(false)]; + tensor var_4289_cast_fp16 = concat(axis = var_38, interleave = var_4289_interleave_0, values = (var_4287_cast_fp16, x1_105_cast_fp16))[name = string("op_4289_cast_fp16")]; + tensor var_4290_cast_fp16 = mul(x = var_4289_cast_fp16, y = sin_213)[name = string("op_4290_cast_fp16")]; + tensor var_4291_cast_fp16 = add(x = var_4276_cast_fp16, y = var_4290_cast_fp16)[name = string("op_4291_cast_fp16")]; + tensor cos_217_axes_0 = const()[name = string("cos_217_axes_0"), val = tensor([2])]; + tensor cos_217 = expand_dims(axes = cos_217_axes_0, x = var_4267_1)[name = string("cos_217")]; + tensor sin_217_axes_0 = const()[name = string("sin_217_axes_0"), val = tensor([2])]; + tensor sin_217 = expand_dims(axes = sin_217_axes_0, x = var_4271_1)[name = string("sin_217")]; + tensor var_4294_cast_fp16 = mul(x = var_4263_cast_fp16_1, y = cos_217)[name = string("op_4294_cast_fp16")]; + tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_107_cast_fp16 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = var_4263_cast_fp16_1)[name = string("x1_107_cast_fp16")]; + tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_107_cast_fp16 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = var_4263_cast_fp16_1)[name = string("x2_107_cast_fp16")]; + fp16 const_329_promoted_to_fp16 = const()[name = string("const_329_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4305_cast_fp16 = mul(x = x2_107_cast_fp16, y = const_329_promoted_to_fp16)[name = string("op_4305_cast_fp16")]; + bool var_4307_interleave_0 = const()[name = string("op_4307_interleave_0"), val = bool(false)]; + tensor var_4307_cast_fp16 = concat(axis = var_38, interleave = var_4307_interleave_0, values = (var_4305_cast_fp16, x1_107_cast_fp16))[name = string("op_4307_cast_fp16")]; + tensor var_4308_cast_fp16 = mul(x = var_4307_cast_fp16, y = sin_217)[name = string("op_4308_cast_fp16")]; + tensor var_4309_cast_fp16 = add(x = var_4294_cast_fp16, y = var_4308_cast_fp16)[name = string("op_4309_cast_fp16")]; + bool query_states_27_interleave_0 = const()[name = string("query_states_27_interleave_0"), val = bool(false)]; + tensor query_states_27_cast_fp16 = concat(axis = var_38, interleave = query_states_27_interleave_0, values = (var_4291_cast_fp16, var_4309_cast_fp16))[name = string("query_states_27_cast_fp16")]; + tensor model_vision_tower_encoder_layers_13_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282826496)))]; + tensor linear_93_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_13_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_182_cast_fp16)[name = string("linear_93_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.52p+4)]; + fp16 model_vision_tower_encoder_layers_13_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5p+4)]; + tensor clip_185_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_93_cast_fp16)[name = string("clip_185_cast_fp16")]; + tensor var_4322 = const()[name = string("op_4322"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_771_cast_fp16 = reshape(shape = var_4322, x = clip_185_cast_fp16)[name = string("hidden_states_771_cast_fp16")]; + fp16 var_33_promoted_93_to_fp16 = const()[name = string("op_33_promoted_93_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4326_cast_fp16 = pow(x = hidden_states_771_cast_fp16, y = var_33_promoted_93_to_fp16)[name = string("op_4326_cast_fp16")]; + tensor var_4328_axes_0 = const()[name = string("op_4328_axes_0"), val = tensor([-1])]; + bool var_4328_keep_dims_0 = const()[name = string("op_4328_keep_dims_0"), val = bool(true)]; + tensor var_4328_cast_fp16 = reduce_mean(axes = var_4328_axes_0, keep_dims = var_4328_keep_dims_0, x = var_4326_cast_fp16)[name = string("op_4328_cast_fp16")]; + fp16 var_4329_to_fp16 = const()[name = string("op_4329_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_187_cast_fp16 = add(x = var_4328_cast_fp16, y = var_4329_to_fp16)[name = string("mean_squared_187_cast_fp16")]; + tensor var_4331_cast_fp16 = pow(x = mean_squared_187_cast_fp16, y = var_27_to_fp16)[name = string("op_4331_cast_fp16")]; + tensor normed_output_347_cast_fp16 = mul(x = hidden_states_771_cast_fp16, y = var_4331_cast_fp16)[name = string("normed_output_347_cast_fp16")]; + tensor const_330_to_fp16 = const()[name = string("const_330_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284006208)))]; + tensor normed_output_349_cast_fp16 = mul(x = normed_output_347_cast_fp16, y = const_330_to_fp16)[name = string("normed_output_349_cast_fp16")]; + tensor var_4351 = const()[name = string("op_4351"), val = tensor([32, 32])]; + int32 var_4352_axis_0 = const()[name = string("op_4352_axis_0"), val = int32(-1)]; + tensor var_4352_cast_fp16_0, tensor var_4352_cast_fp16_1 = split(axis = var_4352_axis_0, split_sizes = var_4351, x = normed_output_349_cast_fp16)[name = string("op_4352_cast_fp16")]; + tensor var_4355 = const()[name = string("op_4355"), val = tensor([32, 32])]; + int32 var_4356_axis_0 = const()[name = string("op_4356_axis_0"), val = int32(-1)]; + tensor var_4356_0, tensor var_4356_1 = split(axis = var_4356_axis_0, split_sizes = var_4355, x = var_160_cast_fp16)[name = string("op_4356")]; + tensor var_4359 = const()[name = string("op_4359"), val = tensor([32, 32])]; + int32 var_4360_axis_0 = const()[name = string("op_4360_axis_0"), val = int32(-1)]; + tensor var_4360_0, tensor var_4360_1 = split(axis = var_4360_axis_0, split_sizes = var_4359, x = var_163_cast_fp16)[name = string("op_4360")]; + tensor cos_221_axes_0 = const()[name = string("cos_221_axes_0"), val = tensor([2])]; + tensor cos_221 = expand_dims(axes = cos_221_axes_0, x = var_4356_0)[name = string("cos_221")]; + tensor sin_221_axes_0 = const()[name = string("sin_221_axes_0"), val = tensor([2])]; + tensor sin_221 = expand_dims(axes = sin_221_axes_0, x = var_4360_0)[name = string("sin_221")]; + tensor var_4365_cast_fp16 = mul(x = var_4352_cast_fp16_0, y = cos_221)[name = string("op_4365_cast_fp16")]; + tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_109_cast_fp16 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = var_4352_cast_fp16_0)[name = string("x1_109_cast_fp16")]; + tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_109_cast_fp16 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = var_4352_cast_fp16_0)[name = string("x2_109_cast_fp16")]; + fp16 const_335_promoted_to_fp16 = const()[name = string("const_335_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4376_cast_fp16 = mul(x = x2_109_cast_fp16, y = const_335_promoted_to_fp16)[name = string("op_4376_cast_fp16")]; + bool var_4378_interleave_0 = const()[name = string("op_4378_interleave_0"), val = bool(false)]; + tensor var_4378_cast_fp16 = concat(axis = var_38, interleave = var_4378_interleave_0, values = (var_4376_cast_fp16, x1_109_cast_fp16))[name = string("op_4378_cast_fp16")]; + tensor var_4379_cast_fp16 = mul(x = var_4378_cast_fp16, y = sin_221)[name = string("op_4379_cast_fp16")]; + tensor var_4380_cast_fp16 = add(x = var_4365_cast_fp16, y = var_4379_cast_fp16)[name = string("op_4380_cast_fp16")]; + tensor cos_225_axes_0 = const()[name = string("cos_225_axes_0"), val = tensor([2])]; + tensor cos_225 = expand_dims(axes = cos_225_axes_0, x = var_4356_1)[name = string("cos_225")]; + tensor sin_225_axes_0 = const()[name = string("sin_225_axes_0"), val = tensor([2])]; + tensor sin_225 = expand_dims(axes = sin_225_axes_0, x = var_4360_1)[name = string("sin_225")]; + tensor var_4383_cast_fp16 = mul(x = var_4352_cast_fp16_1, y = cos_225)[name = string("op_4383_cast_fp16")]; + tensor x1_111_begin_0 = const()[name = string("x1_111_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_111_end_0 = const()[name = string("x1_111_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_111_end_mask_0 = const()[name = string("x1_111_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_111_cast_fp16 = slice_by_index(begin = x1_111_begin_0, end = x1_111_end_0, end_mask = x1_111_end_mask_0, x = var_4352_cast_fp16_1)[name = string("x1_111_cast_fp16")]; + tensor x2_111_begin_0 = const()[name = string("x2_111_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_111_end_0 = const()[name = string("x2_111_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_111_end_mask_0 = const()[name = string("x2_111_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_111_cast_fp16 = slice_by_index(begin = x2_111_begin_0, end = x2_111_end_0, end_mask = x2_111_end_mask_0, x = var_4352_cast_fp16_1)[name = string("x2_111_cast_fp16")]; + fp16 const_338_promoted_to_fp16 = const()[name = string("const_338_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4394_cast_fp16 = mul(x = x2_111_cast_fp16, y = const_338_promoted_to_fp16)[name = string("op_4394_cast_fp16")]; + bool var_4396_interleave_0 = const()[name = string("op_4396_interleave_0"), val = bool(false)]; + tensor var_4396_cast_fp16 = concat(axis = var_38, interleave = var_4396_interleave_0, values = (var_4394_cast_fp16, x1_111_cast_fp16))[name = string("op_4396_cast_fp16")]; + tensor var_4397_cast_fp16 = mul(x = var_4396_cast_fp16, y = sin_225)[name = string("op_4397_cast_fp16")]; + tensor var_4398_cast_fp16 = add(x = var_4383_cast_fp16, y = var_4397_cast_fp16)[name = string("op_4398_cast_fp16")]; + bool key_states_27_interleave_0 = const()[name = string("key_states_27_interleave_0"), val = bool(false)]; + tensor key_states_27_cast_fp16 = concat(axis = var_38, interleave = key_states_27_interleave_0, values = (var_4380_cast_fp16, var_4398_cast_fp16))[name = string("key_states_27_cast_fp16")]; + tensor model_vision_tower_encoder_layers_13_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284006400)))]; + tensor linear_94_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_13_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_182_cast_fp16)[name = string("linear_94_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.52p+4)]; + fp16 model_vision_tower_encoder_layers_13_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5p+4)]; + tensor clip_187_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_94_cast_fp16)[name = string("clip_187_cast_fp16")]; + tensor var_4411 = const()[name = string("op_4411"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_777_cast_fp16 = reshape(shape = var_4411, x = clip_187_cast_fp16)[name = string("hidden_states_777_cast_fp16")]; + fp16 var_33_promoted_94_to_fp16 = const()[name = string("op_33_promoted_94_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4414_cast_fp16 = pow(x = hidden_states_777_cast_fp16, y = var_33_promoted_94_to_fp16)[name = string("op_4414_cast_fp16")]; + tensor var_4416_axes_0 = const()[name = string("op_4416_axes_0"), val = tensor([-1])]; + bool var_4416_keep_dims_0 = const()[name = string("op_4416_keep_dims_0"), val = bool(true)]; + tensor var_4416_cast_fp16 = reduce_mean(axes = var_4416_axes_0, keep_dims = var_4416_keep_dims_0, x = var_4414_cast_fp16)[name = string("op_4416_cast_fp16")]; + fp16 var_4417_to_fp16 = const()[name = string("op_4417_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_189_cast_fp16 = add(x = var_4416_cast_fp16, y = var_4417_to_fp16)[name = string("mean_squared_189_cast_fp16")]; + tensor var_4419_cast_fp16 = pow(x = mean_squared_189_cast_fp16, y = var_27_to_fp16)[name = string("op_4419_cast_fp16")]; + tensor normed_output_351_cast_fp16 = mul(x = hidden_states_777_cast_fp16, y = var_4419_cast_fp16)[name = string("normed_output_351_cast_fp16")]; + tensor hidden_states_783_perm_0 = const()[name = string("hidden_states_783_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(true)]; + bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; + tensor transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_91 = transpose(perm = transpose_91_perm_0, x = key_states_27_cast_fp16)[name = string("transpose_105")]; + tensor transpose_90 = transpose(perm = transpose_90_perm_0, x = query_states_27_cast_fp16)[name = string("transpose_106")]; + tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("matmul_13_cast_fp16")]; + tensor add_13_cast_fp16 = add(x = matmul_13_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_13_cast_fp16")]; + int32 softmax_13_axis_0 = const()[name = string("softmax_13_axis_0"), val = int32(-1)]; + tensor softmax_13_cast_fp16 = softmax(axis = softmax_13_axis_0, x = add_13_cast_fp16)[name = string("softmax_13_cast_fp16")]; + bool attn_output_53_transpose_x_0 = const()[name = string("attn_output_53_transpose_x_0"), val = bool(false)]; + bool attn_output_53_transpose_y_0 = const()[name = string("attn_output_53_transpose_y_0"), val = bool(false)]; + tensor hidden_states_783_cast_fp16 = transpose(perm = hidden_states_783_perm_0, x = normed_output_351_cast_fp16)[name = string("transpose_107")]; + tensor attn_output_53_cast_fp16 = matmul(transpose_x = attn_output_53_transpose_x_0, transpose_y = attn_output_53_transpose_y_0, x = softmax_13_cast_fp16, y = hidden_states_783_cast_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor var_4424_perm_0 = const()[name = string("op_4424_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4426 = const()[name = string("op_4426"), val = tensor([1, 2304, -1])]; + tensor var_4424_cast_fp16 = transpose(perm = var_4424_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_104")]; + tensor var_4427_cast_fp16 = reshape(shape = var_4426, x = var_4424_cast_fp16)[name = string("op_4427_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.f8p+0)]; + fp16 model_vision_tower_encoder_layers_13_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.f4p+0)]; + tensor clip_188_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_self_attn_o_proj_input_max_promoted_to_fp16, x = var_4427_cast_fp16)[name = string("clip_188_cast_fp16")]; + tensor model_vision_tower_encoder_layers_13_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285186112)))]; + tensor linear_95_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_13_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_188_cast_fp16)[name = string("linear_95_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.6ep+1)]; + fp16 model_vision_tower_encoder_layers_13_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.6ap+1)]; + tensor clip_189_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_95_cast_fp16)[name = string("clip_189_cast_fp16")]; + fp16 var_33_promoted_95_to_fp16 = const()[name = string("op_33_promoted_95_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4440_cast_fp16 = pow(x = clip_189_cast_fp16, y = var_33_promoted_95_to_fp16)[name = string("op_4440_cast_fp16")]; + tensor var_4442_axes_0 = const()[name = string("op_4442_axes_0"), val = tensor([-1])]; + bool var_4442_keep_dims_0 = const()[name = string("op_4442_keep_dims_0"), val = bool(true)]; + tensor var_4442_cast_fp16 = reduce_mean(axes = var_4442_axes_0, keep_dims = var_4442_keep_dims_0, x = var_4440_cast_fp16)[name = string("op_4442_cast_fp16")]; + fp16 var_4443_to_fp16 = const()[name = string("op_4443_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_191_cast_fp16 = add(x = var_4442_cast_fp16, y = var_4443_to_fp16)[name = string("mean_squared_191_cast_fp16")]; + tensor var_4445_cast_fp16 = pow(x = mean_squared_191_cast_fp16, y = var_27_to_fp16)[name = string("op_4445_cast_fp16")]; + tensor normed_output_353_cast_fp16 = mul(x = clip_189_cast_fp16, y = var_4445_cast_fp16)[name = string("normed_output_353_cast_fp16")]; + tensor const_339_to_fp16 = const()[name = string("const_339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286365824)))]; + tensor normed_output_355_cast_fp16 = mul(x = normed_output_353_cast_fp16, y = const_339_to_fp16)[name = string("normed_output_355_cast_fp16")]; + tensor hidden_states_795_cast_fp16 = add(x = hidden_states_757_cast_fp16, y = normed_output_355_cast_fp16)[name = string("hidden_states_795_cast_fp16")]; + fp16 var_33_promoted_96_to_fp16 = const()[name = string("op_33_promoted_96_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4453_cast_fp16 = pow(x = hidden_states_795_cast_fp16, y = var_33_promoted_96_to_fp16)[name = string("op_4453_cast_fp16")]; + tensor var_4455_axes_0 = const()[name = string("op_4455_axes_0"), val = tensor([-1])]; + bool var_4455_keep_dims_0 = const()[name = string("op_4455_keep_dims_0"), val = bool(true)]; + tensor var_4455_cast_fp16 = reduce_mean(axes = var_4455_axes_0, keep_dims = var_4455_keep_dims_0, x = var_4453_cast_fp16)[name = string("op_4455_cast_fp16")]; + fp16 var_4456_to_fp16 = const()[name = string("op_4456_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_193_cast_fp16 = add(x = var_4455_cast_fp16, y = var_4456_to_fp16)[name = string("mean_squared_193_cast_fp16")]; + tensor var_4458_cast_fp16 = pow(x = mean_squared_193_cast_fp16, y = var_27_to_fp16)[name = string("op_4458_cast_fp16")]; + tensor normed_output_357_cast_fp16 = mul(x = hidden_states_795_cast_fp16, y = var_4458_cast_fp16)[name = string("normed_output_357_cast_fp16")]; + tensor const_340_to_fp16 = const()[name = string("const_340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286367424)))]; + tensor normed_output_359_cast_fp16 = mul(x = normed_output_357_cast_fp16, y = const_340_to_fp16)[name = string("normed_output_359_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1p+3)]; + fp16 model_vision_tower_encoder_layers_13_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.fcp+2)]; + tensor clip_190_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_359_cast_fp16)[name = string("clip_190_cast_fp16")]; + tensor model_vision_tower_encoder_layers_13_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286369024)))]; + tensor linear_96_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_13_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_190_cast_fp16)[name = string("linear_96_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.08p+3)]; + fp16 model_vision_tower_encoder_layers_13_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.06p+3)]; + tensor clip_191_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_96_cast_fp16)[name = string("clip_191_cast_fp16")]; + string var_4475_mode_0 = const()[name = string("op_4475_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4475_cast_fp16 = gelu(mode = var_4475_mode_0, x = clip_191_cast_fp16)[name = string("op_4475_cast_fp16")]; + tensor model_vision_tower_encoder_layers_13_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291087680)))]; + tensor linear_97_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_13_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_190_cast_fp16)[name = string("linear_97_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.08p+3)]; + fp16 model_vision_tower_encoder_layers_13_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.06p+3)]; + tensor clip_193_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_mlp_up_proj_output_max_promoted_to_fp16, x = linear_97_cast_fp16)[name = string("clip_193_cast_fp16")]; + tensor hidden_states_805_cast_fp16 = mul(x = var_4475_cast_fp16, y = clip_193_cast_fp16)[name = string("hidden_states_805_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.8ap+3)]; + fp16 model_vision_tower_encoder_layers_13_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.86p+3)]; + tensor clip_194_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_805_cast_fp16)[name = string("clip_194_cast_fp16")]; + tensor model_vision_tower_encoder_layers_13_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295806336)))]; + tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_13_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_194_cast_fp16)[name = string("linear_98_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_13_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.f4p+1)]; + fp16 model_vision_tower_encoder_layers_13_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_13_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.fp+1)]; + tensor clip_195_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_13_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_13_mlp_down_proj_output_max_promoted_to_fp16, x = linear_98_cast_fp16)[name = string("clip_195_cast_fp16")]; + fp16 var_33_promoted_97_to_fp16 = const()[name = string("op_33_promoted_97_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4497_cast_fp16 = pow(x = clip_195_cast_fp16, y = var_33_promoted_97_to_fp16)[name = string("op_4497_cast_fp16")]; + tensor var_4499_axes_0 = const()[name = string("op_4499_axes_0"), val = tensor([-1])]; + bool var_4499_keep_dims_0 = const()[name = string("op_4499_keep_dims_0"), val = bool(true)]; + tensor var_4499_cast_fp16 = reduce_mean(axes = var_4499_axes_0, keep_dims = var_4499_keep_dims_0, x = var_4497_cast_fp16)[name = string("op_4499_cast_fp16")]; + fp16 var_4500_to_fp16 = const()[name = string("op_4500_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_195_cast_fp16 = add(x = var_4499_cast_fp16, y = var_4500_to_fp16)[name = string("mean_squared_195_cast_fp16")]; + tensor var_4502_cast_fp16 = pow(x = mean_squared_195_cast_fp16, y = var_27_to_fp16)[name = string("op_4502_cast_fp16")]; + tensor normed_output_361_cast_fp16 = mul(x = clip_195_cast_fp16, y = var_4502_cast_fp16)[name = string("normed_output_361_cast_fp16")]; + tensor const_341_to_fp16 = const()[name = string("const_341_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300524992)))]; + tensor normed_output_363_cast_fp16 = mul(x = normed_output_361_cast_fp16, y = const_341_to_fp16)[name = string("normed_output_363_cast_fp16")]; + tensor hidden_states_815_cast_fp16 = add(x = hidden_states_795_cast_fp16, y = normed_output_363_cast_fp16)[name = string("hidden_states_815_cast_fp16")]; + fp16 var_33_promoted_98_to_fp16 = const()[name = string("op_33_promoted_98_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4516_cast_fp16 = pow(x = hidden_states_815_cast_fp16, y = var_33_promoted_98_to_fp16)[name = string("op_4516_cast_fp16")]; + tensor var_4518_axes_0 = const()[name = string("op_4518_axes_0"), val = tensor([-1])]; + bool var_4518_keep_dims_0 = const()[name = string("op_4518_keep_dims_0"), val = bool(true)]; + tensor var_4518_cast_fp16 = reduce_mean(axes = var_4518_axes_0, keep_dims = var_4518_keep_dims_0, x = var_4516_cast_fp16)[name = string("op_4518_cast_fp16")]; + fp16 var_4519_to_fp16 = const()[name = string("op_4519_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_197_cast_fp16 = add(x = var_4518_cast_fp16, y = var_4519_to_fp16)[name = string("mean_squared_197_cast_fp16")]; + tensor var_4521_cast_fp16 = pow(x = mean_squared_197_cast_fp16, y = var_27_to_fp16)[name = string("op_4521_cast_fp16")]; + tensor normed_output_365_cast_fp16 = mul(x = hidden_states_815_cast_fp16, y = var_4521_cast_fp16)[name = string("normed_output_365_cast_fp16")]; + tensor const_342_to_fp16 = const()[name = string("const_342_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300526592)))]; + tensor normed_output_367_cast_fp16 = mul(x = normed_output_365_cast_fp16, y = const_342_to_fp16)[name = string("normed_output_367_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.3ap+4)]; + fp16 model_vision_tower_encoder_layers_14_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.38p+4)]; + tensor clip_196_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_367_cast_fp16)[name = string("clip_196_cast_fp16")]; + tensor model_vision_tower_encoder_layers_14_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300528192)))]; + tensor linear_99_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_14_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_196_cast_fp16)[name = string("linear_99_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.62p+4)]; + fp16 model_vision_tower_encoder_layers_14_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5ep+4)]; + tensor clip_197_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_99_cast_fp16)[name = string("clip_197_cast_fp16")]; + tensor var_4543 = const()[name = string("op_4543"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_823_cast_fp16 = reshape(shape = var_4543, x = clip_197_cast_fp16)[name = string("hidden_states_823_cast_fp16")]; + fp16 var_33_promoted_99_to_fp16 = const()[name = string("op_33_promoted_99_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4547_cast_fp16 = pow(x = hidden_states_823_cast_fp16, y = var_33_promoted_99_to_fp16)[name = string("op_4547_cast_fp16")]; + tensor var_4549_axes_0 = const()[name = string("op_4549_axes_0"), val = tensor([-1])]; + bool var_4549_keep_dims_0 = const()[name = string("op_4549_keep_dims_0"), val = bool(true)]; + tensor var_4549_cast_fp16 = reduce_mean(axes = var_4549_axes_0, keep_dims = var_4549_keep_dims_0, x = var_4547_cast_fp16)[name = string("op_4549_cast_fp16")]; + fp16 var_4550_to_fp16 = const()[name = string("op_4550_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_199_cast_fp16 = add(x = var_4549_cast_fp16, y = var_4550_to_fp16)[name = string("mean_squared_199_cast_fp16")]; + tensor var_4552_cast_fp16 = pow(x = mean_squared_199_cast_fp16, y = var_27_to_fp16)[name = string("op_4552_cast_fp16")]; + tensor normed_output_369_cast_fp16 = mul(x = hidden_states_823_cast_fp16, y = var_4552_cast_fp16)[name = string("normed_output_369_cast_fp16")]; + tensor const_345_to_fp16 = const()[name = string("const_345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301707904)))]; + tensor normed_output_371_cast_fp16 = mul(x = normed_output_369_cast_fp16, y = const_345_to_fp16)[name = string("normed_output_371_cast_fp16")]; + tensor var_4572 = const()[name = string("op_4572"), val = tensor([32, 32])]; + int32 var_4573_axis_0 = const()[name = string("op_4573_axis_0"), val = int32(-1)]; + tensor var_4573_cast_fp16_0, tensor var_4573_cast_fp16_1 = split(axis = var_4573_axis_0, split_sizes = var_4572, x = normed_output_371_cast_fp16)[name = string("op_4573_cast_fp16")]; + tensor var_4576 = const()[name = string("op_4576"), val = tensor([32, 32])]; + int32 var_4577_axis_0 = const()[name = string("op_4577_axis_0"), val = int32(-1)]; + tensor var_4577_0, tensor var_4577_1 = split(axis = var_4577_axis_0, split_sizes = var_4576, x = var_160_cast_fp16)[name = string("op_4577")]; + tensor var_4580 = const()[name = string("op_4580"), val = tensor([32, 32])]; + int32 var_4581_axis_0 = const()[name = string("op_4581_axis_0"), val = int32(-1)]; + tensor var_4581_0, tensor var_4581_1 = split(axis = var_4581_axis_0, split_sizes = var_4580, x = var_163_cast_fp16)[name = string("op_4581")]; + tensor cos_229_axes_0 = const()[name = string("cos_229_axes_0"), val = tensor([2])]; + tensor cos_229 = expand_dims(axes = cos_229_axes_0, x = var_4577_0)[name = string("cos_229")]; + tensor sin_229_axes_0 = const()[name = string("sin_229_axes_0"), val = tensor([2])]; + tensor sin_229 = expand_dims(axes = sin_229_axes_0, x = var_4581_0)[name = string("sin_229")]; + tensor var_4586_cast_fp16 = mul(x = var_4573_cast_fp16_0, y = cos_229)[name = string("op_4586_cast_fp16")]; + tensor x1_113_begin_0 = const()[name = string("x1_113_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_113_end_0 = const()[name = string("x1_113_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_113_end_mask_0 = const()[name = string("x1_113_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_113_cast_fp16 = slice_by_index(begin = x1_113_begin_0, end = x1_113_end_0, end_mask = x1_113_end_mask_0, x = var_4573_cast_fp16_0)[name = string("x1_113_cast_fp16")]; + tensor x2_113_begin_0 = const()[name = string("x2_113_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_113_end_0 = const()[name = string("x2_113_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_113_end_mask_0 = const()[name = string("x2_113_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_113_cast_fp16 = slice_by_index(begin = x2_113_begin_0, end = x2_113_end_0, end_mask = x2_113_end_mask_0, x = var_4573_cast_fp16_0)[name = string("x2_113_cast_fp16")]; + fp16 const_350_promoted_to_fp16 = const()[name = string("const_350_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4597_cast_fp16 = mul(x = x2_113_cast_fp16, y = const_350_promoted_to_fp16)[name = string("op_4597_cast_fp16")]; + bool var_4599_interleave_0 = const()[name = string("op_4599_interleave_0"), val = bool(false)]; + tensor var_4599_cast_fp16 = concat(axis = var_38, interleave = var_4599_interleave_0, values = (var_4597_cast_fp16, x1_113_cast_fp16))[name = string("op_4599_cast_fp16")]; + tensor var_4600_cast_fp16 = mul(x = var_4599_cast_fp16, y = sin_229)[name = string("op_4600_cast_fp16")]; + tensor var_4601_cast_fp16 = add(x = var_4586_cast_fp16, y = var_4600_cast_fp16)[name = string("op_4601_cast_fp16")]; + tensor cos_233_axes_0 = const()[name = string("cos_233_axes_0"), val = tensor([2])]; + tensor cos_233 = expand_dims(axes = cos_233_axes_0, x = var_4577_1)[name = string("cos_233")]; + tensor sin_233_axes_0 = const()[name = string("sin_233_axes_0"), val = tensor([2])]; + tensor sin_233 = expand_dims(axes = sin_233_axes_0, x = var_4581_1)[name = string("sin_233")]; + tensor var_4604_cast_fp16 = mul(x = var_4573_cast_fp16_1, y = cos_233)[name = string("op_4604_cast_fp16")]; + tensor x1_115_begin_0 = const()[name = string("x1_115_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_115_end_0 = const()[name = string("x1_115_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_115_end_mask_0 = const()[name = string("x1_115_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_115_cast_fp16 = slice_by_index(begin = x1_115_begin_0, end = x1_115_end_0, end_mask = x1_115_end_mask_0, x = var_4573_cast_fp16_1)[name = string("x1_115_cast_fp16")]; + tensor x2_115_begin_0 = const()[name = string("x2_115_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_115_end_0 = const()[name = string("x2_115_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_115_end_mask_0 = const()[name = string("x2_115_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_115_cast_fp16 = slice_by_index(begin = x2_115_begin_0, end = x2_115_end_0, end_mask = x2_115_end_mask_0, x = var_4573_cast_fp16_1)[name = string("x2_115_cast_fp16")]; + fp16 const_353_promoted_to_fp16 = const()[name = string("const_353_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4615_cast_fp16 = mul(x = x2_115_cast_fp16, y = const_353_promoted_to_fp16)[name = string("op_4615_cast_fp16")]; + bool var_4617_interleave_0 = const()[name = string("op_4617_interleave_0"), val = bool(false)]; + tensor var_4617_cast_fp16 = concat(axis = var_38, interleave = var_4617_interleave_0, values = (var_4615_cast_fp16, x1_115_cast_fp16))[name = string("op_4617_cast_fp16")]; + tensor var_4618_cast_fp16 = mul(x = var_4617_cast_fp16, y = sin_233)[name = string("op_4618_cast_fp16")]; + tensor var_4619_cast_fp16 = add(x = var_4604_cast_fp16, y = var_4618_cast_fp16)[name = string("op_4619_cast_fp16")]; + bool query_states_29_interleave_0 = const()[name = string("query_states_29_interleave_0"), val = bool(false)]; + tensor query_states_29_cast_fp16 = concat(axis = var_38, interleave = query_states_29_interleave_0, values = (var_4601_cast_fp16, var_4619_cast_fp16))[name = string("query_states_29_cast_fp16")]; + tensor model_vision_tower_encoder_layers_14_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301708096)))]; + tensor linear_100_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_14_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_196_cast_fp16)[name = string("linear_100_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.7p+4)]; + fp16 model_vision_tower_encoder_layers_14_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.6ep+4)]; + tensor clip_199_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_100_cast_fp16)[name = string("clip_199_cast_fp16")]; + tensor var_4632 = const()[name = string("op_4632"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_829_cast_fp16 = reshape(shape = var_4632, x = clip_199_cast_fp16)[name = string("hidden_states_829_cast_fp16")]; + fp16 var_33_promoted_100_to_fp16 = const()[name = string("op_33_promoted_100_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4636_cast_fp16 = pow(x = hidden_states_829_cast_fp16, y = var_33_promoted_100_to_fp16)[name = string("op_4636_cast_fp16")]; + tensor var_4638_axes_0 = const()[name = string("op_4638_axes_0"), val = tensor([-1])]; + bool var_4638_keep_dims_0 = const()[name = string("op_4638_keep_dims_0"), val = bool(true)]; + tensor var_4638_cast_fp16 = reduce_mean(axes = var_4638_axes_0, keep_dims = var_4638_keep_dims_0, x = var_4636_cast_fp16)[name = string("op_4638_cast_fp16")]; + fp16 var_4639_to_fp16 = const()[name = string("op_4639_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_201_cast_fp16 = add(x = var_4638_cast_fp16, y = var_4639_to_fp16)[name = string("mean_squared_201_cast_fp16")]; + tensor var_4641_cast_fp16 = pow(x = mean_squared_201_cast_fp16, y = var_27_to_fp16)[name = string("op_4641_cast_fp16")]; + tensor normed_output_373_cast_fp16 = mul(x = hidden_states_829_cast_fp16, y = var_4641_cast_fp16)[name = string("normed_output_373_cast_fp16")]; + tensor const_354_to_fp16 = const()[name = string("const_354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302887808)))]; + tensor normed_output_375_cast_fp16 = mul(x = normed_output_373_cast_fp16, y = const_354_to_fp16)[name = string("normed_output_375_cast_fp16")]; + tensor var_4661 = const()[name = string("op_4661"), val = tensor([32, 32])]; + int32 var_4662_axis_0 = const()[name = string("op_4662_axis_0"), val = int32(-1)]; + tensor var_4662_cast_fp16_0, tensor var_4662_cast_fp16_1 = split(axis = var_4662_axis_0, split_sizes = var_4661, x = normed_output_375_cast_fp16)[name = string("op_4662_cast_fp16")]; + tensor var_4665 = const()[name = string("op_4665"), val = tensor([32, 32])]; + int32 var_4666_axis_0 = const()[name = string("op_4666_axis_0"), val = int32(-1)]; + tensor var_4666_0, tensor var_4666_1 = split(axis = var_4666_axis_0, split_sizes = var_4665, x = var_160_cast_fp16)[name = string("op_4666")]; + tensor var_4669 = const()[name = string("op_4669"), val = tensor([32, 32])]; + int32 var_4670_axis_0 = const()[name = string("op_4670_axis_0"), val = int32(-1)]; + tensor var_4670_0, tensor var_4670_1 = split(axis = var_4670_axis_0, split_sizes = var_4669, x = var_163_cast_fp16)[name = string("op_4670")]; + tensor cos_237_axes_0 = const()[name = string("cos_237_axes_0"), val = tensor([2])]; + tensor cos_237 = expand_dims(axes = cos_237_axes_0, x = var_4666_0)[name = string("cos_237")]; + tensor sin_237_axes_0 = const()[name = string("sin_237_axes_0"), val = tensor([2])]; + tensor sin_237 = expand_dims(axes = sin_237_axes_0, x = var_4670_0)[name = string("sin_237")]; + tensor var_4675_cast_fp16 = mul(x = var_4662_cast_fp16_0, y = cos_237)[name = string("op_4675_cast_fp16")]; + tensor x1_117_begin_0 = const()[name = string("x1_117_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_117_end_0 = const()[name = string("x1_117_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_117_end_mask_0 = const()[name = string("x1_117_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_117_cast_fp16 = slice_by_index(begin = x1_117_begin_0, end = x1_117_end_0, end_mask = x1_117_end_mask_0, x = var_4662_cast_fp16_0)[name = string("x1_117_cast_fp16")]; + tensor x2_117_begin_0 = const()[name = string("x2_117_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_117_end_0 = const()[name = string("x2_117_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_117_end_mask_0 = const()[name = string("x2_117_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_117_cast_fp16 = slice_by_index(begin = x2_117_begin_0, end = x2_117_end_0, end_mask = x2_117_end_mask_0, x = var_4662_cast_fp16_0)[name = string("x2_117_cast_fp16")]; + fp16 const_359_promoted_to_fp16 = const()[name = string("const_359_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4686_cast_fp16 = mul(x = x2_117_cast_fp16, y = const_359_promoted_to_fp16)[name = string("op_4686_cast_fp16")]; + bool var_4688_interleave_0 = const()[name = string("op_4688_interleave_0"), val = bool(false)]; + tensor var_4688_cast_fp16 = concat(axis = var_38, interleave = var_4688_interleave_0, values = (var_4686_cast_fp16, x1_117_cast_fp16))[name = string("op_4688_cast_fp16")]; + tensor var_4689_cast_fp16 = mul(x = var_4688_cast_fp16, y = sin_237)[name = string("op_4689_cast_fp16")]; + tensor var_4690_cast_fp16 = add(x = var_4675_cast_fp16, y = var_4689_cast_fp16)[name = string("op_4690_cast_fp16")]; + tensor cos_241_axes_0 = const()[name = string("cos_241_axes_0"), val = tensor([2])]; + tensor cos_241 = expand_dims(axes = cos_241_axes_0, x = var_4666_1)[name = string("cos_241")]; + tensor sin_241_axes_0 = const()[name = string("sin_241_axes_0"), val = tensor([2])]; + tensor sin_241 = expand_dims(axes = sin_241_axes_0, x = var_4670_1)[name = string("sin_241")]; + tensor var_4693_cast_fp16 = mul(x = var_4662_cast_fp16_1, y = cos_241)[name = string("op_4693_cast_fp16")]; + tensor x1_119_begin_0 = const()[name = string("x1_119_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_119_end_0 = const()[name = string("x1_119_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_119_end_mask_0 = const()[name = string("x1_119_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_119_cast_fp16 = slice_by_index(begin = x1_119_begin_0, end = x1_119_end_0, end_mask = x1_119_end_mask_0, x = var_4662_cast_fp16_1)[name = string("x1_119_cast_fp16")]; + tensor x2_119_begin_0 = const()[name = string("x2_119_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_119_end_0 = const()[name = string("x2_119_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_119_end_mask_0 = const()[name = string("x2_119_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_119_cast_fp16 = slice_by_index(begin = x2_119_begin_0, end = x2_119_end_0, end_mask = x2_119_end_mask_0, x = var_4662_cast_fp16_1)[name = string("x2_119_cast_fp16")]; + fp16 const_362_promoted_to_fp16 = const()[name = string("const_362_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4704_cast_fp16 = mul(x = x2_119_cast_fp16, y = const_362_promoted_to_fp16)[name = string("op_4704_cast_fp16")]; + bool var_4706_interleave_0 = const()[name = string("op_4706_interleave_0"), val = bool(false)]; + tensor var_4706_cast_fp16 = concat(axis = var_38, interleave = var_4706_interleave_0, values = (var_4704_cast_fp16, x1_119_cast_fp16))[name = string("op_4706_cast_fp16")]; + tensor var_4707_cast_fp16 = mul(x = var_4706_cast_fp16, y = sin_241)[name = string("op_4707_cast_fp16")]; + tensor var_4708_cast_fp16 = add(x = var_4693_cast_fp16, y = var_4707_cast_fp16)[name = string("op_4708_cast_fp16")]; + bool key_states_29_interleave_0 = const()[name = string("key_states_29_interleave_0"), val = bool(false)]; + tensor key_states_29_cast_fp16 = concat(axis = var_38, interleave = key_states_29_interleave_0, values = (var_4690_cast_fp16, var_4708_cast_fp16))[name = string("key_states_29_cast_fp16")]; + tensor model_vision_tower_encoder_layers_14_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302888000)))]; + tensor linear_101_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_14_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_196_cast_fp16)[name = string("linear_101_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.7p+4)]; + fp16 model_vision_tower_encoder_layers_14_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.6ep+4)]; + tensor clip_201_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_101_cast_fp16)[name = string("clip_201_cast_fp16")]; + tensor var_4721 = const()[name = string("op_4721"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_835_cast_fp16 = reshape(shape = var_4721, x = clip_201_cast_fp16)[name = string("hidden_states_835_cast_fp16")]; + fp16 var_33_promoted_101_to_fp16 = const()[name = string("op_33_promoted_101_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4724_cast_fp16 = pow(x = hidden_states_835_cast_fp16, y = var_33_promoted_101_to_fp16)[name = string("op_4724_cast_fp16")]; + tensor var_4726_axes_0 = const()[name = string("op_4726_axes_0"), val = tensor([-1])]; + bool var_4726_keep_dims_0 = const()[name = string("op_4726_keep_dims_0"), val = bool(true)]; + tensor var_4726_cast_fp16 = reduce_mean(axes = var_4726_axes_0, keep_dims = var_4726_keep_dims_0, x = var_4724_cast_fp16)[name = string("op_4726_cast_fp16")]; + fp16 var_4727_to_fp16 = const()[name = string("op_4727_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_203_cast_fp16 = add(x = var_4726_cast_fp16, y = var_4727_to_fp16)[name = string("mean_squared_203_cast_fp16")]; + tensor var_4729_cast_fp16 = pow(x = mean_squared_203_cast_fp16, y = var_27_to_fp16)[name = string("op_4729_cast_fp16")]; + tensor normed_output_377_cast_fp16 = mul(x = hidden_states_835_cast_fp16, y = var_4729_cast_fp16)[name = string("normed_output_377_cast_fp16")]; + tensor hidden_states_841_perm_0 = const()[name = string("hidden_states_841_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(true)]; + bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; + tensor transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_93 = transpose(perm = transpose_93_perm_0, x = key_states_29_cast_fp16)[name = string("transpose_101")]; + tensor transpose_92 = transpose(perm = transpose_92_perm_0, x = query_states_29_cast_fp16)[name = string("transpose_102")]; + tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("matmul_14_cast_fp16")]; + tensor add_14_cast_fp16 = add(x = matmul_14_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_14_cast_fp16")]; + int32 softmax_14_axis_0 = const()[name = string("softmax_14_axis_0"), val = int32(-1)]; + tensor softmax_14_cast_fp16 = softmax(axis = softmax_14_axis_0, x = add_14_cast_fp16)[name = string("softmax_14_cast_fp16")]; + bool attn_output_57_transpose_x_0 = const()[name = string("attn_output_57_transpose_x_0"), val = bool(false)]; + bool attn_output_57_transpose_y_0 = const()[name = string("attn_output_57_transpose_y_0"), val = bool(false)]; + tensor hidden_states_841_cast_fp16 = transpose(perm = hidden_states_841_perm_0, x = normed_output_377_cast_fp16)[name = string("transpose_103")]; + tensor attn_output_57_cast_fp16 = matmul(transpose_x = attn_output_57_transpose_x_0, transpose_y = attn_output_57_transpose_y_0, x = softmax_14_cast_fp16, y = hidden_states_841_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_4734_perm_0 = const()[name = string("op_4734_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4736 = const()[name = string("op_4736"), val = tensor([1, 2304, -1])]; + tensor var_4734_cast_fp16 = transpose(perm = var_4734_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_100")]; + tensor var_4737_cast_fp16 = reshape(shape = var_4736, x = var_4734_cast_fp16)[name = string("op_4737_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.2cp+1)]; + fp16 model_vision_tower_encoder_layers_14_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.2ap+1)]; + tensor clip_202_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_self_attn_o_proj_input_max_promoted_to_fp16, x = var_4737_cast_fp16)[name = string("clip_202_cast_fp16")]; + tensor model_vision_tower_encoder_layers_14_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304067712)))]; + tensor linear_102_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_14_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_202_cast_fp16)[name = string("linear_102_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.74p+1)]; + fp16 model_vision_tower_encoder_layers_14_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.7p+1)]; + tensor clip_203_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_102_cast_fp16)[name = string("clip_203_cast_fp16")]; + fp16 var_33_promoted_102_to_fp16 = const()[name = string("op_33_promoted_102_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4750_cast_fp16 = pow(x = clip_203_cast_fp16, y = var_33_promoted_102_to_fp16)[name = string("op_4750_cast_fp16")]; + tensor var_4752_axes_0 = const()[name = string("op_4752_axes_0"), val = tensor([-1])]; + bool var_4752_keep_dims_0 = const()[name = string("op_4752_keep_dims_0"), val = bool(true)]; + tensor var_4752_cast_fp16 = reduce_mean(axes = var_4752_axes_0, keep_dims = var_4752_keep_dims_0, x = var_4750_cast_fp16)[name = string("op_4752_cast_fp16")]; + fp16 var_4753_to_fp16 = const()[name = string("op_4753_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_205_cast_fp16 = add(x = var_4752_cast_fp16, y = var_4753_to_fp16)[name = string("mean_squared_205_cast_fp16")]; + tensor var_4755_cast_fp16 = pow(x = mean_squared_205_cast_fp16, y = var_27_to_fp16)[name = string("op_4755_cast_fp16")]; + tensor normed_output_379_cast_fp16 = mul(x = clip_203_cast_fp16, y = var_4755_cast_fp16)[name = string("normed_output_379_cast_fp16")]; + tensor const_363_to_fp16 = const()[name = string("const_363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305247424)))]; + tensor normed_output_381_cast_fp16 = mul(x = normed_output_379_cast_fp16, y = const_363_to_fp16)[name = string("normed_output_381_cast_fp16")]; + tensor hidden_states_853_cast_fp16 = add(x = hidden_states_815_cast_fp16, y = normed_output_381_cast_fp16)[name = string("hidden_states_853_cast_fp16")]; + fp16 var_33_promoted_103_to_fp16 = const()[name = string("op_33_promoted_103_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4763_cast_fp16 = pow(x = hidden_states_853_cast_fp16, y = var_33_promoted_103_to_fp16)[name = string("op_4763_cast_fp16")]; + tensor var_4765_axes_0 = const()[name = string("op_4765_axes_0"), val = tensor([-1])]; + bool var_4765_keep_dims_0 = const()[name = string("op_4765_keep_dims_0"), val = bool(true)]; + tensor var_4765_cast_fp16 = reduce_mean(axes = var_4765_axes_0, keep_dims = var_4765_keep_dims_0, x = var_4763_cast_fp16)[name = string("op_4765_cast_fp16")]; + fp16 var_4766_to_fp16 = const()[name = string("op_4766_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_207_cast_fp16 = add(x = var_4765_cast_fp16, y = var_4766_to_fp16)[name = string("mean_squared_207_cast_fp16")]; + tensor var_4768_cast_fp16 = pow(x = mean_squared_207_cast_fp16, y = var_27_to_fp16)[name = string("op_4768_cast_fp16")]; + tensor normed_output_383_cast_fp16 = mul(x = hidden_states_853_cast_fp16, y = var_4768_cast_fp16)[name = string("normed_output_383_cast_fp16")]; + tensor const_364_to_fp16 = const()[name = string("const_364_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305249024)))]; + tensor normed_output_385_cast_fp16 = mul(x = normed_output_383_cast_fp16, y = const_364_to_fp16)[name = string("normed_output_385_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.1ap+3)]; + fp16 model_vision_tower_encoder_layers_14_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.18p+3)]; + tensor clip_204_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_385_cast_fp16)[name = string("clip_204_cast_fp16")]; + tensor model_vision_tower_encoder_layers_14_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305250624)))]; + tensor linear_103_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_14_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_204_cast_fp16)[name = string("linear_103_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.28p+3)]; + fp16 model_vision_tower_encoder_layers_14_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.26p+3)]; + tensor clip_205_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_103_cast_fp16)[name = string("clip_205_cast_fp16")]; + string var_4785_mode_0 = const()[name = string("op_4785_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_4785_cast_fp16 = gelu(mode = var_4785_mode_0, x = clip_205_cast_fp16)[name = string("op_4785_cast_fp16")]; + tensor model_vision_tower_encoder_layers_14_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309969280)))]; + tensor linear_104_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_14_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_204_cast_fp16)[name = string("linear_104_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.28p+3)]; + fp16 model_vision_tower_encoder_layers_14_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.26p+3)]; + tensor clip_207_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_mlp_up_proj_output_max_promoted_to_fp16, x = linear_104_cast_fp16)[name = string("clip_207_cast_fp16")]; + tensor hidden_states_863_cast_fp16 = mul(x = var_4785_cast_fp16, y = clip_207_cast_fp16)[name = string("hidden_states_863_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.bp+3)]; + fp16 model_vision_tower_encoder_layers_14_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.aep+3)]; + tensor clip_208_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_863_cast_fp16)[name = string("clip_208_cast_fp16")]; + tensor model_vision_tower_encoder_layers_14_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314687936)))]; + tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_14_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_208_cast_fp16)[name = string("linear_105_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_14_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.3p+2)]; + fp16 model_vision_tower_encoder_layers_14_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_14_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.2ep+2)]; + tensor clip_209_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_14_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_14_mlp_down_proj_output_max_promoted_to_fp16, x = linear_105_cast_fp16)[name = string("clip_209_cast_fp16")]; + fp16 var_33_promoted_104_to_fp16 = const()[name = string("op_33_promoted_104_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4807_cast_fp16 = pow(x = clip_209_cast_fp16, y = var_33_promoted_104_to_fp16)[name = string("op_4807_cast_fp16")]; + tensor var_4809_axes_0 = const()[name = string("op_4809_axes_0"), val = tensor([-1])]; + bool var_4809_keep_dims_0 = const()[name = string("op_4809_keep_dims_0"), val = bool(true)]; + tensor var_4809_cast_fp16 = reduce_mean(axes = var_4809_axes_0, keep_dims = var_4809_keep_dims_0, x = var_4807_cast_fp16)[name = string("op_4809_cast_fp16")]; + fp16 var_4810_to_fp16 = const()[name = string("op_4810_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_209_cast_fp16 = add(x = var_4809_cast_fp16, y = var_4810_to_fp16)[name = string("mean_squared_209_cast_fp16")]; + tensor var_4812_cast_fp16 = pow(x = mean_squared_209_cast_fp16, y = var_27_to_fp16)[name = string("op_4812_cast_fp16")]; + tensor normed_output_387_cast_fp16 = mul(x = clip_209_cast_fp16, y = var_4812_cast_fp16)[name = string("normed_output_387_cast_fp16")]; + tensor const_365_to_fp16 = const()[name = string("const_365_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319406592)))]; + tensor normed_output_389_cast_fp16 = mul(x = normed_output_387_cast_fp16, y = const_365_to_fp16)[name = string("normed_output_389_cast_fp16")]; + tensor hidden_states_873_cast_fp16 = add(x = hidden_states_853_cast_fp16, y = normed_output_389_cast_fp16)[name = string("hidden_states_873_cast_fp16")]; + fp16 var_33_promoted_105_to_fp16 = const()[name = string("op_33_promoted_105_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4826_cast_fp16 = pow(x = hidden_states_873_cast_fp16, y = var_33_promoted_105_to_fp16)[name = string("op_4826_cast_fp16")]; + tensor var_4828_axes_0 = const()[name = string("op_4828_axes_0"), val = tensor([-1])]; + bool var_4828_keep_dims_0 = const()[name = string("op_4828_keep_dims_0"), val = bool(true)]; + tensor var_4828_cast_fp16 = reduce_mean(axes = var_4828_axes_0, keep_dims = var_4828_keep_dims_0, x = var_4826_cast_fp16)[name = string("op_4828_cast_fp16")]; + fp16 var_4829_to_fp16 = const()[name = string("op_4829_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_211_cast_fp16 = add(x = var_4828_cast_fp16, y = var_4829_to_fp16)[name = string("mean_squared_211_cast_fp16")]; + tensor var_4831_cast_fp16 = pow(x = mean_squared_211_cast_fp16, y = var_27_to_fp16)[name = string("op_4831_cast_fp16")]; + tensor normed_output_391_cast_fp16 = mul(x = hidden_states_873_cast_fp16, y = var_4831_cast_fp16)[name = string("normed_output_391_cast_fp16")]; + tensor const_366_to_fp16 = const()[name = string("const_366_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319408192)))]; + tensor normed_output_393_cast_fp16 = mul(x = normed_output_391_cast_fp16, y = const_366_to_fp16)[name = string("normed_output_393_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_self_attn_q_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_q_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.4cp+4)]; + fp16 model_vision_tower_encoder_layers_15_self_attn_q_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_q_proj_input_max_promoted_to_fp16"), val = fp16(0x1.4ap+4)]; + tensor clip_210_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_self_attn_q_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_self_attn_q_proj_input_max_promoted_to_fp16, x = normed_output_393_cast_fp16)[name = string("clip_210_cast_fp16")]; + tensor model_vision_tower_encoder_layers_15_self_attn_q_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_q_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319409792)))]; + tensor linear_106_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_15_self_attn_q_proj_linear_weight_promoted_to_fp16, x = clip_210_cast_fp16)[name = string("linear_106_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_self_attn_q_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_q_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.6p+4)]; + fp16 model_vision_tower_encoder_layers_15_self_attn_q_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_q_proj_output_max_promoted_to_fp16"), val = fp16(0x1.5ep+4)]; + tensor clip_211_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_self_attn_q_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_self_attn_q_proj_output_max_promoted_to_fp16, x = linear_106_cast_fp16)[name = string("clip_211_cast_fp16")]; + tensor var_4853 = const()[name = string("op_4853"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_881_cast_fp16 = reshape(shape = var_4853, x = clip_211_cast_fp16)[name = string("hidden_states_881_cast_fp16")]; + fp16 var_33_promoted_106_to_fp16 = const()[name = string("op_33_promoted_106_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4857_cast_fp16 = pow(x = hidden_states_881_cast_fp16, y = var_33_promoted_106_to_fp16)[name = string("op_4857_cast_fp16")]; + tensor var_4859_axes_0 = const()[name = string("op_4859_axes_0"), val = tensor([-1])]; + bool var_4859_keep_dims_0 = const()[name = string("op_4859_keep_dims_0"), val = bool(true)]; + tensor var_4859_cast_fp16 = reduce_mean(axes = var_4859_axes_0, keep_dims = var_4859_keep_dims_0, x = var_4857_cast_fp16)[name = string("op_4859_cast_fp16")]; + fp16 var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_213_cast_fp16 = add(x = var_4859_cast_fp16, y = var_4860_to_fp16)[name = string("mean_squared_213_cast_fp16")]; + tensor var_4862_cast_fp16 = pow(x = mean_squared_213_cast_fp16, y = var_27_to_fp16)[name = string("op_4862_cast_fp16")]; + tensor normed_output_395_cast_fp16 = mul(x = hidden_states_881_cast_fp16, y = var_4862_cast_fp16)[name = string("normed_output_395_cast_fp16")]; + tensor const_369_to_fp16 = const()[name = string("const_369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320589504)))]; + tensor normed_output_397_cast_fp16 = mul(x = normed_output_395_cast_fp16, y = const_369_to_fp16)[name = string("normed_output_397_cast_fp16")]; + tensor var_4882 = const()[name = string("op_4882"), val = tensor([32, 32])]; + int32 var_4883_axis_0 = const()[name = string("op_4883_axis_0"), val = int32(-1)]; + tensor var_4883_cast_fp16_0, tensor var_4883_cast_fp16_1 = split(axis = var_4883_axis_0, split_sizes = var_4882, x = normed_output_397_cast_fp16)[name = string("op_4883_cast_fp16")]; + tensor var_4886 = const()[name = string("op_4886"), val = tensor([32, 32])]; + int32 var_4887_axis_0 = const()[name = string("op_4887_axis_0"), val = int32(-1)]; + tensor var_4887_0, tensor var_4887_1 = split(axis = var_4887_axis_0, split_sizes = var_4886, x = var_160_cast_fp16)[name = string("op_4887")]; + tensor var_4890 = const()[name = string("op_4890"), val = tensor([32, 32])]; + int32 var_4891_axis_0 = const()[name = string("op_4891_axis_0"), val = int32(-1)]; + tensor var_4891_0, tensor var_4891_1 = split(axis = var_4891_axis_0, split_sizes = var_4890, x = var_163_cast_fp16)[name = string("op_4891")]; + tensor cos_245_axes_0 = const()[name = string("cos_245_axes_0"), val = tensor([2])]; + tensor cos_245 = expand_dims(axes = cos_245_axes_0, x = var_4887_0)[name = string("cos_245")]; + tensor sin_245_axes_0 = const()[name = string("sin_245_axes_0"), val = tensor([2])]; + tensor sin_245 = expand_dims(axes = sin_245_axes_0, x = var_4891_0)[name = string("sin_245")]; + tensor var_4896_cast_fp16 = mul(x = var_4883_cast_fp16_0, y = cos_245)[name = string("op_4896_cast_fp16")]; + tensor x1_121_begin_0 = const()[name = string("x1_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_121_end_0 = const()[name = string("x1_121_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_121_end_mask_0 = const()[name = string("x1_121_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_121_cast_fp16 = slice_by_index(begin = x1_121_begin_0, end = x1_121_end_0, end_mask = x1_121_end_mask_0, x = var_4883_cast_fp16_0)[name = string("x1_121_cast_fp16")]; + tensor x2_121_begin_0 = const()[name = string("x2_121_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_121_end_0 = const()[name = string("x2_121_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_121_end_mask_0 = const()[name = string("x2_121_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_121_cast_fp16 = slice_by_index(begin = x2_121_begin_0, end = x2_121_end_0, end_mask = x2_121_end_mask_0, x = var_4883_cast_fp16_0)[name = string("x2_121_cast_fp16")]; + fp16 const_374_promoted_to_fp16 = const()[name = string("const_374_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4907_cast_fp16 = mul(x = x2_121_cast_fp16, y = const_374_promoted_to_fp16)[name = string("op_4907_cast_fp16")]; + bool var_4909_interleave_0 = const()[name = string("op_4909_interleave_0"), val = bool(false)]; + tensor var_4909_cast_fp16 = concat(axis = var_38, interleave = var_4909_interleave_0, values = (var_4907_cast_fp16, x1_121_cast_fp16))[name = string("op_4909_cast_fp16")]; + tensor var_4910_cast_fp16 = mul(x = var_4909_cast_fp16, y = sin_245)[name = string("op_4910_cast_fp16")]; + tensor var_4911_cast_fp16 = add(x = var_4896_cast_fp16, y = var_4910_cast_fp16)[name = string("op_4911_cast_fp16")]; + tensor cos_249_axes_0 = const()[name = string("cos_249_axes_0"), val = tensor([2])]; + tensor cos_249 = expand_dims(axes = cos_249_axes_0, x = var_4887_1)[name = string("cos_249")]; + tensor sin_249_axes_0 = const()[name = string("sin_249_axes_0"), val = tensor([2])]; + tensor sin_249 = expand_dims(axes = sin_249_axes_0, x = var_4891_1)[name = string("sin_249")]; + tensor var_4914_cast_fp16 = mul(x = var_4883_cast_fp16_1, y = cos_249)[name = string("op_4914_cast_fp16")]; + tensor x1_123_begin_0 = const()[name = string("x1_123_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_123_end_0 = const()[name = string("x1_123_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_123_end_mask_0 = const()[name = string("x1_123_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_123_cast_fp16 = slice_by_index(begin = x1_123_begin_0, end = x1_123_end_0, end_mask = x1_123_end_mask_0, x = var_4883_cast_fp16_1)[name = string("x1_123_cast_fp16")]; + tensor x2_123_begin_0 = const()[name = string("x2_123_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_123_end_0 = const()[name = string("x2_123_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_123_end_mask_0 = const()[name = string("x2_123_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_123_cast_fp16 = slice_by_index(begin = x2_123_begin_0, end = x2_123_end_0, end_mask = x2_123_end_mask_0, x = var_4883_cast_fp16_1)[name = string("x2_123_cast_fp16")]; + fp16 const_377_promoted_to_fp16 = const()[name = string("const_377_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4925_cast_fp16 = mul(x = x2_123_cast_fp16, y = const_377_promoted_to_fp16)[name = string("op_4925_cast_fp16")]; + bool var_4927_interleave_0 = const()[name = string("op_4927_interleave_0"), val = bool(false)]; + tensor var_4927_cast_fp16 = concat(axis = var_38, interleave = var_4927_interleave_0, values = (var_4925_cast_fp16, x1_123_cast_fp16))[name = string("op_4927_cast_fp16")]; + tensor var_4928_cast_fp16 = mul(x = var_4927_cast_fp16, y = sin_249)[name = string("op_4928_cast_fp16")]; + tensor var_4929_cast_fp16 = add(x = var_4914_cast_fp16, y = var_4928_cast_fp16)[name = string("op_4929_cast_fp16")]; + bool query_states_interleave_0 = const()[name = string("query_states_interleave_0"), val = bool(false)]; + tensor query_states_cast_fp16 = concat(axis = var_38, interleave = query_states_interleave_0, values = (var_4911_cast_fp16, var_4929_cast_fp16))[name = string("query_states_cast_fp16")]; + tensor model_vision_tower_encoder_layers_15_self_attn_k_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_k_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320589696)))]; + tensor linear_107_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_15_self_attn_k_proj_linear_weight_promoted_to_fp16, x = clip_210_cast_fp16)[name = string("linear_107_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_self_attn_k_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_k_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.9p+4)]; + fp16 model_vision_tower_encoder_layers_15_self_attn_k_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_k_proj_output_max_promoted_to_fp16"), val = fp16(0x1.8ep+4)]; + tensor clip_213_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_self_attn_k_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_self_attn_k_proj_output_max_promoted_to_fp16, x = linear_107_cast_fp16)[name = string("clip_213_cast_fp16")]; + tensor var_4942 = const()[name = string("op_4942"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_887_cast_fp16 = reshape(shape = var_4942, x = clip_213_cast_fp16)[name = string("hidden_states_887_cast_fp16")]; + fp16 var_33_promoted_107_to_fp16 = const()[name = string("op_33_promoted_107_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4946_cast_fp16 = pow(x = hidden_states_887_cast_fp16, y = var_33_promoted_107_to_fp16)[name = string("op_4946_cast_fp16")]; + tensor var_4948_axes_0 = const()[name = string("op_4948_axes_0"), val = tensor([-1])]; + bool var_4948_keep_dims_0 = const()[name = string("op_4948_keep_dims_0"), val = bool(true)]; + tensor var_4948_cast_fp16 = reduce_mean(axes = var_4948_axes_0, keep_dims = var_4948_keep_dims_0, x = var_4946_cast_fp16)[name = string("op_4948_cast_fp16")]; + fp16 var_4949_to_fp16 = const()[name = string("op_4949_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_215_cast_fp16 = add(x = var_4948_cast_fp16, y = var_4949_to_fp16)[name = string("mean_squared_215_cast_fp16")]; + tensor var_4951_cast_fp16 = pow(x = mean_squared_215_cast_fp16, y = var_27_to_fp16)[name = string("op_4951_cast_fp16")]; + tensor normed_output_399_cast_fp16 = mul(x = hidden_states_887_cast_fp16, y = var_4951_cast_fp16)[name = string("normed_output_399_cast_fp16")]; + tensor const_378_to_fp16 = const()[name = string("const_378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321769408)))]; + tensor normed_output_401_cast_fp16 = mul(x = normed_output_399_cast_fp16, y = const_378_to_fp16)[name = string("normed_output_401_cast_fp16")]; + tensor var_4971 = const()[name = string("op_4971"), val = tensor([32, 32])]; + int32 var_4972_axis_0 = const()[name = string("op_4972_axis_0"), val = int32(-1)]; + tensor var_4972_cast_fp16_0, tensor var_4972_cast_fp16_1 = split(axis = var_4972_axis_0, split_sizes = var_4971, x = normed_output_401_cast_fp16)[name = string("op_4972_cast_fp16")]; + tensor var_4975 = const()[name = string("op_4975"), val = tensor([32, 32])]; + int32 var_4976_axis_0 = const()[name = string("op_4976_axis_0"), val = int32(-1)]; + tensor var_4976_0, tensor var_4976_1 = split(axis = var_4976_axis_0, split_sizes = var_4975, x = var_160_cast_fp16)[name = string("op_4976")]; + tensor var_4979 = const()[name = string("op_4979"), val = tensor([32, 32])]; + int32 var_4980_axis_0 = const()[name = string("op_4980_axis_0"), val = int32(-1)]; + tensor var_4980_0, tensor var_4980_1 = split(axis = var_4980_axis_0, split_sizes = var_4979, x = var_163_cast_fp16)[name = string("op_4980")]; + tensor cos_253_axes_0 = const()[name = string("cos_253_axes_0"), val = tensor([2])]; + tensor cos_253 = expand_dims(axes = cos_253_axes_0, x = var_4976_0)[name = string("cos_253")]; + tensor sin_253_axes_0 = const()[name = string("sin_253_axes_0"), val = tensor([2])]; + tensor sin_253 = expand_dims(axes = sin_253_axes_0, x = var_4980_0)[name = string("sin_253")]; + tensor var_4985_cast_fp16 = mul(x = var_4972_cast_fp16_0, y = cos_253)[name = string("op_4985_cast_fp16")]; + tensor x1_125_begin_0 = const()[name = string("x1_125_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_125_end_0 = const()[name = string("x1_125_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_125_end_mask_0 = const()[name = string("x1_125_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_125_cast_fp16 = slice_by_index(begin = x1_125_begin_0, end = x1_125_end_0, end_mask = x1_125_end_mask_0, x = var_4972_cast_fp16_0)[name = string("x1_125_cast_fp16")]; + tensor x2_125_begin_0 = const()[name = string("x2_125_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_125_end_0 = const()[name = string("x2_125_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_125_end_mask_0 = const()[name = string("x2_125_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_125_cast_fp16 = slice_by_index(begin = x2_125_begin_0, end = x2_125_end_0, end_mask = x2_125_end_mask_0, x = var_4972_cast_fp16_0)[name = string("x2_125_cast_fp16")]; + fp16 const_383_promoted_to_fp16 = const()[name = string("const_383_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4996_cast_fp16 = mul(x = x2_125_cast_fp16, y = const_383_promoted_to_fp16)[name = string("op_4996_cast_fp16")]; + bool var_4998_interleave_0 = const()[name = string("op_4998_interleave_0"), val = bool(false)]; + tensor var_4998_cast_fp16 = concat(axis = var_38, interleave = var_4998_interleave_0, values = (var_4996_cast_fp16, x1_125_cast_fp16))[name = string("op_4998_cast_fp16")]; + tensor var_4999_cast_fp16 = mul(x = var_4998_cast_fp16, y = sin_253)[name = string("op_4999_cast_fp16")]; + tensor var_5000_cast_fp16 = add(x = var_4985_cast_fp16, y = var_4999_cast_fp16)[name = string("op_5000_cast_fp16")]; + tensor cos_axes_0 = const()[name = string("cos_axes_0"), val = tensor([2])]; + tensor cos = expand_dims(axes = cos_axes_0, x = var_4976_1)[name = string("cos")]; + tensor sin_axes_0 = const()[name = string("sin_axes_0"), val = tensor([2])]; + tensor sin = expand_dims(axes = sin_axes_0, x = var_4980_1)[name = string("sin")]; + tensor var_5003_cast_fp16 = mul(x = var_4972_cast_fp16_1, y = cos)[name = string("op_5003_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 2304, 12, 16])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_4972_cast_fp16_1)[name = string("x1_cast_fp16")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 16])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 2304, 12, 32])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_4972_cast_fp16_1)[name = string("x2_cast_fp16")]; + fp16 const_386_promoted_to_fp16 = const()[name = string("const_386_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5014_cast_fp16 = mul(x = x2_cast_fp16, y = const_386_promoted_to_fp16)[name = string("op_5014_cast_fp16")]; + bool var_5016_interleave_0 = const()[name = string("op_5016_interleave_0"), val = bool(false)]; + tensor var_5016_cast_fp16 = concat(axis = var_38, interleave = var_5016_interleave_0, values = (var_5014_cast_fp16, x1_cast_fp16))[name = string("op_5016_cast_fp16")]; + tensor var_5017_cast_fp16 = mul(x = var_5016_cast_fp16, y = sin)[name = string("op_5017_cast_fp16")]; + tensor var_5018_cast_fp16 = add(x = var_5003_cast_fp16, y = var_5017_cast_fp16)[name = string("op_5018_cast_fp16")]; + bool key_states_interleave_0 = const()[name = string("key_states_interleave_0"), val = bool(false)]; + tensor key_states_cast_fp16 = concat(axis = var_38, interleave = key_states_interleave_0, values = (var_5000_cast_fp16, var_5018_cast_fp16))[name = string("key_states_cast_fp16")]; + tensor model_vision_tower_encoder_layers_15_self_attn_v_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_v_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321769600)))]; + tensor linear_108_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_15_self_attn_v_proj_linear_weight_promoted_to_fp16, x = clip_210_cast_fp16)[name = string("linear_108_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_self_attn_v_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_v_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.9p+4)]; + fp16 model_vision_tower_encoder_layers_15_self_attn_v_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_v_proj_output_max_promoted_to_fp16"), val = fp16(0x1.8ep+4)]; + tensor clip_215_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_self_attn_v_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_self_attn_v_proj_output_max_promoted_to_fp16, x = linear_108_cast_fp16)[name = string("clip_215_cast_fp16")]; + tensor var_5031 = const()[name = string("op_5031"), val = tensor([1, 2304, -1, 64])]; + tensor hidden_states_893_cast_fp16 = reshape(shape = var_5031, x = clip_215_cast_fp16)[name = string("hidden_states_893_cast_fp16")]; + fp16 var_33_promoted_108_to_fp16 = const()[name = string("op_33_promoted_108_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5034_cast_fp16 = pow(x = hidden_states_893_cast_fp16, y = var_33_promoted_108_to_fp16)[name = string("op_5034_cast_fp16")]; + tensor var_5036_axes_0 = const()[name = string("op_5036_axes_0"), val = tensor([-1])]; + bool var_5036_keep_dims_0 = const()[name = string("op_5036_keep_dims_0"), val = bool(true)]; + tensor var_5036_cast_fp16 = reduce_mean(axes = var_5036_axes_0, keep_dims = var_5036_keep_dims_0, x = var_5034_cast_fp16)[name = string("op_5036_cast_fp16")]; + fp16 var_5037_to_fp16 = const()[name = string("op_5037_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_217_cast_fp16 = add(x = var_5036_cast_fp16, y = var_5037_to_fp16)[name = string("mean_squared_217_cast_fp16")]; + tensor var_5039_cast_fp16 = pow(x = mean_squared_217_cast_fp16, y = var_27_to_fp16)[name = string("op_5039_cast_fp16")]; + tensor normed_output_403_cast_fp16 = mul(x = hidden_states_893_cast_fp16, y = var_5039_cast_fp16)[name = string("normed_output_403_cast_fp16")]; + tensor hidden_states_899_perm_0 = const()[name = string("hidden_states_899_perm_0"), val = tensor([0, 2, 1, 3])]; + bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(true)]; + bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; + tensor transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_95 = transpose(perm = transpose_95_perm_0, x = key_states_cast_fp16)[name = string("transpose_97")]; + tensor transpose_94 = transpose(perm = transpose_94_perm_0, x = query_states_cast_fp16)[name = string("transpose_98")]; + tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("matmul_15_cast_fp16")]; + tensor add_15_cast_fp16 = add(x = matmul_15_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_15_cast_fp16")]; + int32 softmax_15_axis_0 = const()[name = string("softmax_15_axis_0"), val = int32(-1)]; + tensor softmax_15_cast_fp16 = softmax(axis = softmax_15_axis_0, x = add_15_cast_fp16)[name = string("softmax_15_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor hidden_states_899_cast_fp16 = transpose(perm = hidden_states_899_perm_0, x = normed_output_403_cast_fp16)[name = string("transpose_99")]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = softmax_15_cast_fp16, y = hidden_states_899_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_5044_perm_0 = const()[name = string("op_5044_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5046 = const()[name = string("op_5046"), val = tensor([1, 2304, -1])]; + tensor var_5044_cast_fp16 = transpose(perm = var_5044_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_96")]; + tensor var_5047_cast_fp16 = reshape(shape = var_5046, x = var_5044_cast_fp16)[name = string("op_5047_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_self_attn_o_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_o_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.22p+1)]; + fp16 model_vision_tower_encoder_layers_15_self_attn_o_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_o_proj_input_max_promoted_to_fp16"), val = fp16(0x1.2p+1)]; + tensor clip_216_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_self_attn_o_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_self_attn_o_proj_input_max_promoted_to_fp16, x = var_5047_cast_fp16)[name = string("clip_216_cast_fp16")]; + tensor model_vision_tower_encoder_layers_15_self_attn_o_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_o_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322949312)))]; + tensor linear_109_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_15_self_attn_o_proj_linear_weight_promoted_to_fp16, x = clip_216_cast_fp16)[name = string("linear_109_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_self_attn_o_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_o_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.d4p+1)]; + fp16 model_vision_tower_encoder_layers_15_self_attn_o_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_self_attn_o_proj_output_max_promoted_to_fp16"), val = fp16(0x1.d2p+1)]; + tensor clip_217_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_self_attn_o_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_self_attn_o_proj_output_max_promoted_to_fp16, x = linear_109_cast_fp16)[name = string("clip_217_cast_fp16")]; + fp16 var_33_promoted_109_to_fp16 = const()[name = string("op_33_promoted_109_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5060_cast_fp16 = pow(x = clip_217_cast_fp16, y = var_33_promoted_109_to_fp16)[name = string("op_5060_cast_fp16")]; + tensor var_5062_axes_0 = const()[name = string("op_5062_axes_0"), val = tensor([-1])]; + bool var_5062_keep_dims_0 = const()[name = string("op_5062_keep_dims_0"), val = bool(true)]; + tensor var_5062_cast_fp16 = reduce_mean(axes = var_5062_axes_0, keep_dims = var_5062_keep_dims_0, x = var_5060_cast_fp16)[name = string("op_5062_cast_fp16")]; + fp16 var_5063_to_fp16 = const()[name = string("op_5063_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_219_cast_fp16 = add(x = var_5062_cast_fp16, y = var_5063_to_fp16)[name = string("mean_squared_219_cast_fp16")]; + tensor var_5065_cast_fp16 = pow(x = mean_squared_219_cast_fp16, y = var_27_to_fp16)[name = string("op_5065_cast_fp16")]; + tensor normed_output_405_cast_fp16 = mul(x = clip_217_cast_fp16, y = var_5065_cast_fp16)[name = string("normed_output_405_cast_fp16")]; + tensor const_387_to_fp16 = const()[name = string("const_387_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324129024)))]; + tensor normed_output_407_cast_fp16 = mul(x = normed_output_405_cast_fp16, y = const_387_to_fp16)[name = string("normed_output_407_cast_fp16")]; + tensor hidden_states_911_cast_fp16 = add(x = hidden_states_873_cast_fp16, y = normed_output_407_cast_fp16)[name = string("hidden_states_911_cast_fp16")]; + fp16 var_33_promoted_110_to_fp16 = const()[name = string("op_33_promoted_110_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5073_cast_fp16 = pow(x = hidden_states_911_cast_fp16, y = var_33_promoted_110_to_fp16)[name = string("op_5073_cast_fp16")]; + tensor var_5075_axes_0 = const()[name = string("op_5075_axes_0"), val = tensor([-1])]; + bool var_5075_keep_dims_0 = const()[name = string("op_5075_keep_dims_0"), val = bool(true)]; + tensor var_5075_cast_fp16 = reduce_mean(axes = var_5075_axes_0, keep_dims = var_5075_keep_dims_0, x = var_5073_cast_fp16)[name = string("op_5075_cast_fp16")]; + fp16 var_5076_to_fp16 = const()[name = string("op_5076_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_221_cast_fp16 = add(x = var_5075_cast_fp16, y = var_5076_to_fp16)[name = string("mean_squared_221_cast_fp16")]; + tensor var_5078_cast_fp16 = pow(x = mean_squared_221_cast_fp16, y = var_27_to_fp16)[name = string("op_5078_cast_fp16")]; + tensor normed_output_409_cast_fp16 = mul(x = hidden_states_911_cast_fp16, y = var_5078_cast_fp16)[name = string("normed_output_409_cast_fp16")]; + tensor const_388_to_fp16 = const()[name = string("const_388_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324130624)))]; + tensor normed_output_411_cast_fp16 = mul(x = normed_output_409_cast_fp16, y = const_388_to_fp16)[name = string("normed_output_411_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_mlp_gate_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_gate_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.fap+2)]; + fp16 model_vision_tower_encoder_layers_15_mlp_gate_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_gate_proj_input_max_promoted_to_fp16"), val = fp16(0x1.f6p+2)]; + tensor clip_218_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_mlp_gate_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_mlp_gate_proj_input_max_promoted_to_fp16, x = normed_output_411_cast_fp16)[name = string("clip_218_cast_fp16")]; + tensor model_vision_tower_encoder_layers_15_mlp_gate_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_gate_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324132224)))]; + tensor linear_110_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_15_mlp_gate_proj_linear_weight_promoted_to_fp16, x = clip_218_cast_fp16)[name = string("linear_110_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_mlp_gate_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_gate_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.18p+3)]; + fp16 model_vision_tower_encoder_layers_15_mlp_gate_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_gate_proj_output_max_promoted_to_fp16"), val = fp16(0x1.16p+3)]; + tensor clip_219_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_mlp_gate_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_mlp_gate_proj_output_max_promoted_to_fp16, x = linear_110_cast_fp16)[name = string("clip_219_cast_fp16")]; + string var_5095_mode_0 = const()[name = string("op_5095_mode_0"), val = string("TANH_APPROXIMATION")]; + tensor var_5095_cast_fp16 = gelu(mode = var_5095_mode_0, x = clip_219_cast_fp16)[name = string("op_5095_cast_fp16")]; + tensor model_vision_tower_encoder_layers_15_mlp_up_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_up_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328850880)))]; + tensor linear_111_cast_fp16 = linear(bias = linear_5_bias_0_to_fp16, weight = model_vision_tower_encoder_layers_15_mlp_up_proj_linear_weight_promoted_to_fp16, x = clip_218_cast_fp16)[name = string("linear_111_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_mlp_up_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_up_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.18p+3)]; + fp16 model_vision_tower_encoder_layers_15_mlp_up_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_up_proj_output_max_promoted_to_fp16"), val = fp16(0x1.16p+3)]; + tensor clip_221_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_mlp_up_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_mlp_up_proj_output_max_promoted_to_fp16, x = linear_111_cast_fp16)[name = string("clip_221_cast_fp16")]; + tensor hidden_states_921_cast_fp16 = mul(x = var_5095_cast_fp16, y = clip_221_cast_fp16)[name = string("hidden_states_921_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_mlp_down_proj_input_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_down_proj_input_min_promoted_to_fp16"), val = fp16(-0x1.82p+3)]; + fp16 model_vision_tower_encoder_layers_15_mlp_down_proj_input_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_down_proj_input_max_promoted_to_fp16"), val = fp16(0x1.7ep+3)]; + tensor clip_222_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_mlp_down_proj_input_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_mlp_down_proj_input_max_promoted_to_fp16, x = hidden_states_921_cast_fp16)[name = string("clip_222_cast_fp16")]; + tensor model_vision_tower_encoder_layers_15_mlp_down_proj_linear_weight_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_down_proj_linear_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333569536)))]; + tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0, weight = model_vision_tower_encoder_layers_15_mlp_down_proj_linear_weight_promoted_to_fp16, x = clip_222_cast_fp16)[name = string("linear_112_cast_fp16")]; + fp16 model_vision_tower_encoder_layers_15_mlp_down_proj_output_min_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_down_proj_output_min_promoted_to_fp16"), val = fp16(-0x1.e8p+1)]; + fp16 model_vision_tower_encoder_layers_15_mlp_down_proj_output_max_promoted_to_fp16 = const()[name = string("model_vision_tower_encoder_layers_15_mlp_down_proj_output_max_promoted_to_fp16"), val = fp16(0x1.e4p+1)]; + tensor clip_223_cast_fp16 = clip(alpha = model_vision_tower_encoder_layers_15_mlp_down_proj_output_min_promoted_to_fp16, beta = model_vision_tower_encoder_layers_15_mlp_down_proj_output_max_promoted_to_fp16, x = linear_112_cast_fp16)[name = string("clip_223_cast_fp16")]; + fp16 var_33_promoted_111_to_fp16 = const()[name = string("op_33_promoted_111_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5117_cast_fp16 = pow(x = clip_223_cast_fp16, y = var_33_promoted_111_to_fp16)[name = string("op_5117_cast_fp16")]; + tensor var_5119_axes_0 = const()[name = string("op_5119_axes_0"), val = tensor([-1])]; + bool var_5119_keep_dims_0 = const()[name = string("op_5119_keep_dims_0"), val = bool(true)]; + tensor var_5119_cast_fp16 = reduce_mean(axes = var_5119_axes_0, keep_dims = var_5119_keep_dims_0, x = var_5117_cast_fp16)[name = string("op_5119_cast_fp16")]; + fp16 var_5120_to_fp16 = const()[name = string("op_5120_to_fp16"), val = fp16(0x1.1p-20)]; + tensor mean_squared_cast_fp16 = add(x = var_5119_cast_fp16, y = var_5120_to_fp16)[name = string("mean_squared_cast_fp16")]; + tensor var_5122_cast_fp16 = pow(x = mean_squared_cast_fp16, y = var_27_to_fp16)[name = string("op_5122_cast_fp16")]; + tensor normed_output_413_cast_fp16 = mul(x = clip_223_cast_fp16, y = var_5122_cast_fp16)[name = string("normed_output_413_cast_fp16")]; + tensor const_389_to_fp16 = const()[name = string("const_389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338288192)))]; + tensor normed_output_cast_fp16 = mul(x = normed_output_413_cast_fp16, y = const_389_to_fp16)[name = string("normed_output_cast_fp16")]; + tensor hidden_states_931_cast_fp16 = add(x = hidden_states_911_cast_fp16, y = normed_output_cast_fp16)[name = string("hidden_states_931_cast_fp16")]; + fp16 var_36_to_fp16 = const()[name = string("op_36_to_fp16"), val = fp16(0x0p+0)]; + tensor hidden_states_933_cast_fp16 = select(a = var_36_to_fp16, b = hidden_states_931_cast_fp16, cond = var_66)[name = string("hidden_states_933_cast_fp16")]; + tensor var_5131_begin_0 = const()[name = string("op_5131_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5131_end_0 = const()[name = string("op_5131_end_0"), val = tensor([1, 2304, 1])]; + tensor var_5131_end_mask_0 = const()[name = string("op_5131_end_mask_0"), val = tensor([true, true, false])]; + tensor var_5131_squeeze_mask_0 = const()[name = string("op_5131_squeeze_mask_0"), val = tensor([false, false, true])]; + tensor var_5131 = slice_by_index(begin = var_5131_begin_0, end = var_5131_end_0, end_mask = var_5131_end_mask_0, squeeze_mask = var_5131_squeeze_mask_0, x = clamped_positions_1)[name = string("op_5131")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = var_5131)[name = string("reduce_max_0")]; + int32 var_5134 = const()[name = string("op_5134"), val = int32(1)]; + tensor var_5135 = add(x = reduce_max_0, y = var_5134)[name = string("op_5135")]; + string cast_66_to_fp16_dtype_0 = const()[name = string("cast_66_to_fp16_dtype_0"), val = string("fp16")]; + fp16 cast_67_to_fp16 = const()[name = string("cast_67_to_fp16"), val = fp16(0x1.8p+1)]; + tensor clamped_positions_1_to_fp16 = cast(dtype = cast_66_to_fp16_dtype_0, x = clamped_positions_1)[name = string("cast_74")]; + tensor kernel_idxs_1_cast_fp16 = floor_div(x = clamped_positions_1_to_fp16, y = cast_67_to_fp16)[name = string("kernel_idxs_1_cast_fp16")]; + tensor var_5137_begin_0 = const()[name = string("op_5137_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5137_end_0 = const()[name = string("op_5137_end_0"), val = tensor([1, 2304, 1])]; + tensor var_5137_end_mask_0 = const()[name = string("op_5137_end_mask_0"), val = tensor([true, true, false])]; + tensor var_5137_squeeze_mask_0 = const()[name = string("op_5137_squeeze_mask_0"), val = tensor([false, false, true])]; + tensor var_5137_cast_fp16 = slice_by_index(begin = var_5137_begin_0, end = var_5137_end_0, end_mask = var_5137_end_mask_0, squeeze_mask = var_5137_squeeze_mask_0, x = kernel_idxs_1_cast_fp16)[name = string("op_5137_cast_fp16")]; + tensor floor_div_161 = floor_div(x = var_5135, y = var_17)[name = string("floor_div_161")]; + tensor var_5139_begin_0 = const()[name = string("op_5139_begin_0"), val = tensor([0, 0, 1])]; + tensor var_5139_end_0 = const()[name = string("op_5139_end_0"), val = tensor([1, 2304, 2])]; + tensor var_5139_end_mask_0 = const()[name = string("op_5139_end_mask_0"), val = tensor([true, true, false])]; + tensor var_5139_squeeze_mask_0 = const()[name = string("op_5139_squeeze_mask_0"), val = tensor([false, false, true])]; + tensor var_5139_cast_fp16 = slice_by_index(begin = var_5139_begin_0, end = var_5139_end_0, end_mask = var_5139_end_mask_0, squeeze_mask = var_5139_squeeze_mask_0, x = kernel_idxs_1_cast_fp16)[name = string("op_5139_cast_fp16")]; + string var_5138_to_fp16_dtype_0 = const()[name = string("op_5138_to_fp16_dtype_0"), val = string("fp16")]; + tensor floor_div_161_to_fp16 = cast(dtype = var_5138_to_fp16_dtype_0, x = floor_div_161)[name = string("cast_73")]; + tensor var_5140_cast_fp16 = mul(x = floor_div_161_to_fp16, y = var_5139_cast_fp16)[name = string("op_5140_cast_fp16")]; + tensor kernel_idxs_3_cast_fp16 = add(x = var_5137_cast_fp16, y = var_5140_cast_fp16)[name = string("kernel_idxs_3_cast_fp16")]; + string kernel_idxs_dtype_0 = const()[name = string("kernel_idxs_dtype_0"), val = string("int32")]; + int32 var_5143_one_hot_vector_size_0 = const()[name = string("op_5143_one_hot_vector_size_0"), val = int32(256)]; + int32 var_5143_axis_0 = const()[name = string("op_5143_axis_0"), val = int32(-1)]; + int32 var_5143_on_value_0 = const()[name = string("op_5143_on_value_0"), val = int32(1)]; + int32 var_5143_off_value_0 = const()[name = string("op_5143_off_value_0"), val = int32(0)]; + tensor kernel_idxs_3_cast_fp16_to_int32 = cast(dtype = kernel_idxs_dtype_0, x = kernel_idxs_3_cast_fp16)[name = string("cast_72")]; + tensor var_5143 = one_hot(axis = var_5143_axis_0, indices = kernel_idxs_3_cast_fp16_to_int32, off_value = var_5143_off_value_0, on_value = var_5143_on_value_0, one_hot_vector_size = var_5143_one_hot_vector_size_0)[name = string("op_5143")]; + string var_5144_to_fp16_dtype_0 = const()[name = string("op_5144_to_fp16_dtype_0"), val = string("fp16")]; + fp16 _inversed_weights_y_0_to_fp16 = const()[name = string("_inversed_weights_y_0_to_fp16"), val = fp16(0x1.c7p-4)]; + tensor var_5143_to_fp16 = cast(dtype = var_5144_to_fp16_dtype_0, x = var_5143)[name = string("cast_71")]; + tensor _inversed_weights_cast_fp16 = mul(x = var_5143_to_fp16, y = _inversed_weights_y_0_to_fp16)[name = string("_inversed_weights_cast_fp16")]; + bool output_transpose_x_1 = const()[name = string("output_transpose_x_1"), val = bool(true)]; + bool output_transpose_y_1 = const()[name = string("output_transpose_y_1"), val = bool(false)]; + tensor output_cast_fp16 = matmul(transpose_x = output_transpose_x_1, transpose_y = output_transpose_y_1, x = _inversed_weights_cast_fp16, y = hidden_states_933_cast_fp16)[name = string("output_cast_fp16")]; + fp16 var_20_to_fp16 = const()[name = string("op_20_to_fp16"), val = fp16(0x1.bb8p+4)]; + tensor x_cast_fp16 = mul(x = output_cast_fp16, y = var_20_to_fp16)[name = string("x_cast_fp16")]; + int32 var_5152 = const()[name = string("op_5152"), val = int32(-1)]; + fp16 const_390_promoted_to_fp16 = const()[name = string("const_390_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5158_cast_fp16 = mul(x = x_cast_fp16, y = const_390_promoted_to_fp16)[name = string("op_5158_cast_fp16")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259_cast_fp16 = concat(axis = var_5152, interleave = input_259_interleave_0, values = (x_cast_fp16, var_5158_cast_fp16))[name = string("input_259_cast_fp16")]; + tensor normed_axes_0 = const()[name = string("normed_axes_0"), val = tensor([-1])]; + fp16 var_5155_to_fp16 = const()[name = string("op_5155_to_fp16"), val = fp16(0x1.5p-17)]; + tensor normed_cast_fp16 = layer_norm(axes = normed_axes_0, epsilon = var_5155_to_fp16, x = input_259_cast_fp16)[name = string("normed_cast_fp16")]; + tensor var_5163_split_sizes_0 = const()[name = string("op_5163_split_sizes_0"), val = tensor([768, 768])]; + int32 var_5163_axis_0 = const()[name = string("op_5163_axis_0"), val = int32(-1)]; + tensor var_5163_cast_fp16_0, tensor var_5163_cast_fp16_1 = split(axis = var_5163_axis_0, split_sizes = var_5163_split_sizes_0, x = normed_cast_fp16)[name = string("op_5163_cast_fp16")]; + tensor model_embed_vision_embedding_projection_weight_promoted_to_fp16 = const()[name = string("model_embed_vision_embedding_projection_weight_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338289792)))]; + tensor linear_113_bias_0_to_fp16 = const()[name = string("linear_113_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342222016)))]; + tensor image_features = linear(bias = linear_113_bias_0_to_fp16, weight = model_embed_vision_embedding_projection_weight_promoted_to_fp16, x = var_5163_cast_fp16_0)[name = string("linear_113_cast_fp16")]; + } -> (image_features); +} \ No newline at end of file diff --git a/vision.ane.mlmodelc/weights/weight.bin b/vision.ane.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..faf9e7798e89a56ed0f28eaa6dd2b735bfe3a073 --- /dev/null +++ b/vision.ane.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:314f7354d81899b6b9481e9431ea27f9eae163a761e2edd8a53b36ef4ad73bf5 +size 342227200